diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index a283f8d5ca..54f3f42a25 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -49,8 +49,8 @@ jobs: if: steps.changed-files.outputs.any_changed == 'true' run: | uv run --directory api ruff --version - uv run --directory api ruff check --diff ./ - uv run --directory api ruff format --check --diff ./ + uv run --directory api ruff check ./ + uv run --directory api ruff format --check ./ - name: Dotenv check if: steps.changed-files.outputs.any_changed == 'true' diff --git a/api/.env.example b/api/.env.example index 80b1c12cd8..18f2dbf647 100644 --- a/api/.env.example +++ b/api/.env.example @@ -4,6 +4,11 @@ # Alternatively you can set it with `SECRET_KEY` environment variable. SECRET_KEY= +# Ensure UTF-8 encoding +LANG=en_US.UTF-8 +LC_ALL=en_US.UTF-8 +PYTHONIOENCODING=utf-8 + # Console API base URL CONSOLE_API_URL=http://localhost:5001 CONSOLE_WEB_URL=http://localhost:3000 diff --git a/api/.ruff.toml b/api/.ruff.toml index 0169613bf8..db6872b9c8 100644 --- a/api/.ruff.toml +++ b/api/.ruff.toml @@ -42,6 +42,8 @@ select = [ "S301", # suspicious-pickle-usage, disallow use of `pickle` and its wrappers. "S302", # suspicious-marshal-usage, disallow use of `marshal` module "S311", # suspicious-non-cryptographic-random-usage + "G001", # don't use str format to logging messages + "G004", # don't use f-strings to format logging messages ] ignore = [ diff --git a/api/Dockerfile b/api/Dockerfile index 8c7a1717b9..e097b5811e 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -37,6 +37,11 @@ EXPOSE 5001 # set timezone ENV TZ=UTC +# Set UTF-8 locale +ENV LANG=en_US.UTF-8 +ENV LC_ALL=en_US.UTF-8 +ENV PYTHONIOENCODING=utf-8 + WORKDIR /app/api RUN \ diff --git a/api/app_factory.py b/api/app_factory.py index 3a258be28f..81155cbacd 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -32,7 +32,7 @@ def create_app() -> DifyApp: initialize_extensions(app) end_time = time.perf_counter() if dify_config.DEBUG: - logging.info(f"Finished create_app ({round((end_time - start_time) * 1000, 2)} ms)") + logging.info("Finished create_app (%s ms)", round((end_time - start_time) * 1000, 2)) return app @@ -91,14 +91,14 @@ def initialize_extensions(app: DifyApp): is_enabled = ext.is_enabled() if hasattr(ext, "is_enabled") else True if not is_enabled: if dify_config.DEBUG: - logging.info(f"Skipped {short_name}") + logging.info("Skipped %s", short_name) continue start_time = time.perf_counter() ext.init_app(app) end_time = time.perf_counter() if dify_config.DEBUG: - logging.info(f"Loaded {short_name} ({round((end_time - start_time) * 1000, 2)} ms)") + logging.info("Loaded %s (%s ms)", short_name, round((end_time - start_time) * 1000, 2)) def create_migrations_app(): diff --git a/api/commands.py b/api/commands.py index c2e62ec261..79bb6713d0 100644 --- a/api/commands.py +++ b/api/commands.py @@ -53,13 +53,13 @@ def reset_password(email, new_password, password_confirm): account = db.session.query(Account).where(Account.email == email).one_or_none() if not account: - click.echo(click.style("Account not found for email: {}".format(email), fg="red")) + click.echo(click.style(f"Account not found for email: {email}", fg="red")) return try: valid_password(new_password) except: - click.echo(click.style("Invalid password. Must match {}".format(password_pattern), fg="red")) + click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red")) return # generate password salt @@ -92,13 +92,13 @@ def reset_email(email, new_email, email_confirm): account = db.session.query(Account).where(Account.email == email).one_or_none() if not account: - click.echo(click.style("Account not found for email: {}".format(email), fg="red")) + click.echo(click.style(f"Account not found for email: {email}", fg="red")) return try: email_validate(new_email) except: - click.echo(click.style("Invalid email: {}".format(new_email), fg="red")) + click.echo(click.style(f"Invalid email: {new_email}", fg="red")) return account.email = new_email @@ -142,7 +142,7 @@ def reset_encrypt_key_pair(): click.echo( click.style( - "Congratulations! The asymmetric key pair of workspace {} has been reset.".format(tenant.id), + f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.", fg="green", ) ) @@ -190,14 +190,14 @@ def migrate_annotation_vector_database(): f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped." ) try: - click.echo("Creating app annotation index: {}".format(app.id)) + click.echo(f"Creating app annotation index: {app.id}") app_annotation_setting = ( db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first() ) if not app_annotation_setting: skipped_count = skipped_count + 1 - click.echo("App annotation setting disabled: {}".format(app.id)) + click.echo(f"App annotation setting disabled: {app.id}") continue # get dataset_collection_binding info dataset_collection_binding = ( @@ -206,7 +206,7 @@ def migrate_annotation_vector_database(): .first() ) if not dataset_collection_binding: - click.echo("App annotation collection binding not found: {}".format(app.id)) + click.echo(f"App annotation collection binding not found: {app.id}") continue annotations = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app.id).all() dataset = Dataset( @@ -252,9 +252,7 @@ def migrate_annotation_vector_database(): create_count += 1 except Exception as e: click.echo( - click.style( - "Error creating app annotation index: {} {}".format(e.__class__.__name__, str(e)), fg="red" - ) + click.style(f"Error creating app annotation index: {e.__class__.__name__} {str(e)}", fg="red") ) continue @@ -319,7 +317,7 @@ def migrate_knowledge_vector_database(): f"Processing the {total_count} dataset {dataset.id}. {create_count} created, {skipped_count} skipped." ) try: - click.echo("Creating dataset vector database index: {}".format(dataset.id)) + click.echo(f"Creating dataset vector database index: {dataset.id}") if dataset.index_struct_dict: if dataset.index_struct_dict["type"] == vector_type: skipped_count = skipped_count + 1 @@ -423,9 +421,7 @@ def migrate_knowledge_vector_database(): create_count += 1 except Exception as e: db.session.rollback() - click.echo( - click.style("Error creating dataset index: {} {}".format(e.__class__.__name__, str(e)), fg="red") - ) + click.echo(click.style(f"Error creating dataset index: {e.__class__.__name__} {str(e)}", fg="red")) continue click.echo( @@ -476,7 +472,7 @@ def convert_to_agent_apps(): break for app in apps: - click.echo("Converting app: {}".format(app.id)) + click.echo(f"Converting app: {app.id}") try: app.mode = AppMode.AGENT_CHAT.value @@ -488,11 +484,11 @@ def convert_to_agent_apps(): ) db.session.commit() - click.echo(click.style("Converted app: {}".format(app.id), fg="green")) + click.echo(click.style(f"Converted app: {app.id}", fg="green")) except Exception as e: - click.echo(click.style("Convert app error: {} {}".format(e.__class__.__name__, str(e)), fg="red")) + click.echo(click.style(f"Convert app error: {e.__class__.__name__} {str(e)}", fg="red")) - click.echo(click.style("Conversion complete. Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green")) + click.echo(click.style(f"Conversion complete. Converted {len(proceeded_app_ids)} agent apps.", fg="green")) @click.command("add-qdrant-index", help="Add Qdrant index.") @@ -665,7 +661,7 @@ def create_tenant(email: str, language: Optional[str] = None, name: Optional[str click.echo( click.style( - "Account and tenant created.\nAccount: {}\nPassword: {}".format(email, new_password), + f"Account and tenant created.\nAccount: {email}\nPassword: {new_password}", fg="green", ) ) @@ -726,16 +722,16 @@ where sites.id is null limit 1000""" if tenant: accounts = tenant.get_accounts() if not accounts: - print("Fix failed for app {}".format(app.id)) + print(f"Fix failed for app {app.id}") continue account = accounts[0] - print("Fixing missing site for app {}".format(app.id)) + print(f"Fixing missing site for app {app.id}") app_was_created.send(app, account=account) except Exception: failed_app_ids.append(app_id) - click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red")) - logging.exception(f"Failed to fix app related site missing issue, app_id: {app_id}") + click.echo(click.style(f"Failed to fix missing site for app {app_id}", fg="red")) + logging.exception("Failed to fix app related site missing issue, app_id: %s", app_id) continue if not processed_count: diff --git a/api/configs/app_config.py b/api/configs/app_config.py index 20f8c40427..d3b1cf9d5b 100644 --- a/api/configs/app_config.py +++ b/api/configs/app_config.py @@ -41,7 +41,7 @@ class RemoteSettingsSourceFactory(PydanticBaseSettingsSource): case RemoteSettingsSourceName.NACOS: remote_source = NacosSettingsSource(current_state) case _: - logger.warning(f"Unsupported remote source: {remote_source_name}") + logger.warning("Unsupported remote source: %s", remote_source_name) return {} d: dict[str, Any] = {} diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index 587ea55ca7..68b16e48db 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -245,11 +245,7 @@ class CeleryConfig(DatabaseConfig): @computed_field def CELERY_RESULT_BACKEND(self) -> str | None: - return ( - "db+{}".format(self.SQLALCHEMY_DATABASE_URI) - if self.CELERY_BACKEND == "database" - else self.CELERY_BROKER_URL - ) + return f"db+{self.SQLALCHEMY_DATABASE_URI}" if self.CELERY_BACKEND == "database" else self.CELERY_BROKER_URL @property def BROKER_USE_SSL(self) -> bool: diff --git a/api/configs/remote_settings_sources/apollo/client.py b/api/configs/remote_settings_sources/apollo/client.py index 88b30d3987..877ff8409f 100644 --- a/api/configs/remote_settings_sources/apollo/client.py +++ b/api/configs/remote_settings_sources/apollo/client.py @@ -76,7 +76,7 @@ class ApolloClient: code, body = http_request(url, timeout=3, headers=self._sign_headers(url)) if code == 200: if not body: - logger.error(f"get_json_from_net load configs failed, body is {body}") + logger.error("get_json_from_net load configs failed, body is %s", body) return None data = json.loads(body) data = data["configurations"] @@ -207,7 +207,7 @@ class ApolloClient: # if the length is 0 it is returned directly if len(notifications) == 0: return - url = "{}/notifications/v2".format(self.config_url) + url = f"{self.config_url}/notifications/v2" params = { "appId": self.app_id, "cluster": self.cluster, @@ -222,7 +222,7 @@ class ApolloClient: return if http_code == 200: if not body: - logger.error(f"_long_poll load configs failed,body is {body}") + logger.error("_long_poll load configs failed,body is %s", body) return data = json.loads(body) for entry in data: @@ -273,12 +273,12 @@ class ApolloClient: time.sleep(60 * 10) # 10 minutes def _do_heart_beat(self, namespace): - url = "{}/configs/{}/{}/{}?ip={}".format(self.config_url, self.app_id, self.cluster, namespace, self.ip) + url = f"{self.config_url}/configs/{self.app_id}/{self.cluster}/{namespace}?ip={self.ip}" try: code, body = http_request(url, timeout=3, headers=self._sign_headers(url)) if code == 200: if not body: - logger.error(f"_do_heart_beat load configs failed,body is {body}") + logger.error("_do_heart_beat load configs failed,body is %s", body) return None data = json.loads(body) if self.last_release_key == data["releaseKey"]: diff --git a/api/configs/remote_settings_sources/apollo/utils.py b/api/configs/remote_settings_sources/apollo/utils.py index 6136112e03..f5b82908ee 100644 --- a/api/configs/remote_settings_sources/apollo/utils.py +++ b/api/configs/remote_settings_sources/apollo/utils.py @@ -24,7 +24,7 @@ def url_encode_wrapper(params): def no_key_cache_key(namespace, key): - return "{}{}{}".format(namespace, len(namespace), key) + return f"{namespace}{len(namespace)}{key}" # Returns whether the obtained value is obtained, and None if it does not diff --git a/api/constants/languages.py b/api/constants/languages.py index 1157ec4307..ab19392c59 100644 --- a/api/constants/languages.py +++ b/api/constants/languages.py @@ -28,5 +28,5 @@ def supported_language(lang): if lang in languages: return lang - error = "{lang} is not a valid language.".format(lang=lang) + error = f"{lang} is not a valid language." raise ValueError(error) diff --git a/api/controllers/console/app/annotation.py b/api/controllers/console/app/annotation.py index 2b48afd550..c2ba880405 100644 --- a/api/controllers/console/app/annotation.py +++ b/api/controllers/console/app/annotation.py @@ -86,7 +86,7 @@ class AnnotationReplyActionStatusApi(Resource): raise Forbidden() job_id = str(job_id) - app_annotation_job_key = "{}_app_annotation_job_{}".format(action, str(job_id)) + app_annotation_job_key = f"{action}_app_annotation_job_{str(job_id)}" cache_result = redis_client.get(app_annotation_job_key) if cache_result is None: raise ValueError("The job does not exist.") @@ -94,7 +94,7 @@ class AnnotationReplyActionStatusApi(Resource): job_status = cache_result.decode() error_msg = "" if job_status == "error": - app_annotation_error_key = "{}_app_annotation_error_{}".format(action, str(job_id)) + app_annotation_error_key = f"{action}_app_annotation_error_{str(job_id)}" error_msg = redis_client.get(app_annotation_error_key).decode() return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200 @@ -123,6 +123,17 @@ class AnnotationListApi(Resource): } return response, 200 + @setup_required + @login_required + @account_initialization_required + def delete(self, app_id): + if not current_user.is_editor: + raise Forbidden() + + app_id = str(app_id) + AppAnnotationService.clear_all_annotations(app_id) + return {"result": "success"}, 204 + class AnnotationExportApi(Resource): @setup_required @@ -223,14 +234,14 @@ class AnnotationBatchImportStatusApi(Resource): raise Forbidden() job_id = str(job_id) - indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}" cache_result = redis_client.get(indexing_cache_key) if cache_result is None: raise ValueError("The job does not exist.") job_status = cache_result.decode() error_msg = "" if job_status == "error": - indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id)) + indexing_error_msg_key = f"app_annotation_batch_import_error_msg_{str(job_id)}" error_msg = redis_client.get(indexing_error_msg_key).decode() return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200 diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py index b5b6d1f75b..6ddae6fad5 100644 --- a/api/controllers/console/app/conversation.py +++ b/api/controllers/console/app/conversation.py @@ -51,8 +51,8 @@ class CompletionConversationApi(Resource): if args["keyword"]: query = query.join(Message, Message.conversation_id == Conversation.id).where( or_( - Message.query.ilike("%{}%".format(args["keyword"])), - Message.answer.ilike("%{}%".format(args["keyword"])), + Message.query.ilike(f"%{args['keyword']}%"), + Message.answer.ilike(f"%{args['keyword']}%"), ) ) @@ -174,7 +174,7 @@ class ChatConversationApi(Resource): query = db.select(Conversation).where(Conversation.app_id == app_model.id) if args["keyword"]: - keyword_filter = "%{}%".format(args["keyword"]) + keyword_filter = f"%{args['keyword']}%" query = ( query.join( Message, diff --git a/api/controllers/console/app/generator.py b/api/controllers/console/app/generator.py index 790369c052..4847a2cab8 100644 --- a/api/controllers/console/app/generator.py +++ b/api/controllers/console/app/generator.py @@ -1,5 +1,3 @@ -import os - from flask_login import current_user from flask_restful import Resource, reqparse @@ -29,15 +27,12 @@ class RuleGenerateApi(Resource): args = parser.parse_args() account = current_user - PROMPT_GENERATION_MAX_TOKENS = int(os.getenv("PROMPT_GENERATION_MAX_TOKENS", "512")) - try: rules = LLMGenerator.generate_rule_config( tenant_id=account.current_tenant_id, instruction=args["instruction"], model_config=args["model_config"], no_variable=args["no_variable"], - rule_config_max_tokens=PROMPT_GENERATION_MAX_TOKENS, ) except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) @@ -64,14 +59,12 @@ class RuleCodeGenerateApi(Resource): args = parser.parse_args() account = current_user - CODE_GENERATION_MAX_TOKENS = int(os.getenv("CODE_GENERATION_MAX_TOKENS", "1024")) try: code_result = LLMGenerator.generate_code( tenant_id=account.current_tenant_id, instruction=args["instruction"], model_config=args["model_config"], code_language=args["code_language"], - max_tokens=CODE_GENERATION_MAX_TOKENS, ) except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) diff --git a/api/controllers/console/auth/data_source_oauth.py b/api/controllers/console/auth/data_source_oauth.py index 4c9697cc32..4940b48754 100644 --- a/api/controllers/console/auth/data_source_oauth.py +++ b/api/controllers/console/auth/data_source_oauth.py @@ -81,7 +81,7 @@ class OAuthDataSourceBinding(Resource): oauth_provider.get_access_token(code) except requests.exceptions.HTTPError as e: logging.exception( - f"An error occurred during the OAuthCallback process with {provider}: {e.response.text}" + "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text ) return {"error": "OAuth data source process failed"}, 400 @@ -103,7 +103,9 @@ class OAuthDataSourceSync(Resource): try: oauth_provider.sync_data_source(binding_id) except requests.exceptions.HTTPError as e: - logging.exception(f"An error occurred during the OAuthCallback process with {provider}: {e.response.text}") + logging.exception( + "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text + ) return {"error": "OAuth data source process failed"}, 400 return {"result": "success"}, 200 diff --git a/api/controllers/console/auth/error.py b/api/controllers/console/auth/error.py index 8c5e23de58..1984339add 100644 --- a/api/controllers/console/auth/error.py +++ b/api/controllers/console/auth/error.py @@ -113,3 +113,9 @@ class MemberNotInTenantError(BaseHTTPException): error_code = "member_not_in_tenant" description = "The member is not in the workspace." code = 400 + + +class AccountInFreezeError(BaseHTTPException): + error_code = "account_in_freeze" + description = "This email is temporarily unavailable." + code = 400 diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py index d0a4f3ff6d..4a6cb99390 100644 --- a/api/controllers/console/auth/oauth.py +++ b/api/controllers/console/auth/oauth.py @@ -80,7 +80,7 @@ class OAuthCallback(Resource): user_info = oauth_provider.get_user_info(token) except requests.exceptions.RequestException as e: error_text = e.response.text if e.response else str(e) - logging.exception(f"An error occurred during the OAuth process with {provider}: {error_text}") + logging.exception("An error occurred during the OAuth process with %s: %s", provider, error_text) return {"error": "OAuth process failed"}, 400 if invite_token and RegisterService.is_valid_invite_token(invite_token): diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index d14b208a4b..b6e91dd98e 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -970,7 +970,7 @@ class DocumentRetryApi(DocumentResource): raise DocumentAlreadyFinishedError() retry_documents.append(document) except Exception: - logging.exception(f"Failed to retry document, document id: {document_id}") + logging.exception("Failed to retry document, document id: %s", document_id) continue # retry document DocumentService.retry_document(dataset_id, retry_documents) diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index b3704ce8b1..8c429044d7 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -1,6 +1,5 @@ import uuid -import pandas as pd from flask import request from flask_login import current_user from flask_restful import Resource, marshal, reqparse @@ -14,8 +13,6 @@ from controllers.console.datasets.error import ( ChildChunkDeleteIndexError, ChildChunkIndexingError, InvalidActionError, - NoFileUploadedError, - TooManyFilesError, ) from controllers.console.wraps import ( account_initialization_required, @@ -32,6 +29,7 @@ from extensions.ext_redis import redis_client from fields.segment_fields import child_chunk_fields, segment_fields from libs.login import login_required from models.dataset import ChildChunk, DocumentSegment +from models.model import UploadFile from services.dataset_service import DatasetService, DocumentService, SegmentService from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError @@ -184,7 +182,7 @@ class DatasetDocumentSegmentApi(Resource): raise ProviderNotInitializeError(ex.description) segment_ids = request.args.getlist("segment_id") - document_indexing_cache_key = "document_{}_indexing".format(document.id) + document_indexing_cache_key = f"document_{document.id}_indexing" cache_result = redis_client.get(document_indexing_cache_key) if cache_result is not None: raise InvalidActionError("Document is being indexed, please try again later") @@ -365,37 +363,28 @@ class DatasetDocumentSegmentBatchImportApi(Resource): document = DocumentService.get_document(dataset_id, document_id) if not document: raise NotFound("Document not found.") - # get file from request - file = request.files["file"] - # check file - if "file" not in request.files: - raise NoFileUploadedError() - if len(request.files) > 1: - raise TooManyFilesError() + parser = reqparse.RequestParser() + parser.add_argument("upload_file_id", type=str, required=True, nullable=False, location="json") + args = parser.parse_args() + upload_file_id = args["upload_file_id"] + + upload_file = db.session.query(UploadFile).where(UploadFile.id == upload_file_id).first() + if not upload_file: + raise NotFound("UploadFile not found.") + # check file type - if not file.filename or not file.filename.lower().endswith(".csv"): + if not upload_file.name or not upload_file.name.lower().endswith(".csv"): raise ValueError("Invalid file type. Only CSV files are allowed") try: - # Skip the first row - df = pd.read_csv(file) - result = [] - for index, row in df.iterrows(): - if document.doc_form == "qa_model": - data = {"content": row.iloc[0], "answer": row.iloc[1]} - else: - data = {"content": row.iloc[0]} - result.append(data) - if len(result) == 0: - raise ValueError("The CSV file is empty.") # async job job_id = str(uuid.uuid4()) - indexing_cache_key = "segment_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"segment_batch_import_{str(job_id)}" # send batch add segments task redis_client.setnx(indexing_cache_key, "waiting") batch_create_segment_to_index_task.delay( - str(job_id), result, dataset_id, document_id, current_user.current_tenant_id, current_user.id + str(job_id), upload_file_id, dataset_id, document_id, current_user.current_tenant_id, current_user.id ) except Exception as e: return {"error": str(e)}, 500 @@ -406,7 +395,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource): @account_initialization_required def get(self, job_id): job_id = str(job_id) - indexing_cache_key = "segment_batch_import_{}".format(job_id) + indexing_cache_key = f"segment_batch_import_{job_id}" cache_result = redis_client.get(indexing_cache_key) if cache_result is None: raise ValueError("The job does not exist.") diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py index ffdf73c368..6d9f794307 100644 --- a/api/controllers/console/explore/installed_app.py +++ b/api/controllers/console/explore/installed_app.py @@ -74,7 +74,7 @@ class InstalledAppsListApi(Resource): ): res.append(installed_app) installed_app_list = res - logger.debug(f"installed_app_list: {installed_app_list}, user_id: {user_id}") + logger.debug("installed_app_list: %s, user_id: %s", installed_app_list, user_id) installed_app_list.sort( key=lambda app: ( diff --git a/api/controllers/console/version.py b/api/controllers/console/version.py index 447cc358f8..8237ea3cdc 100644 --- a/api/controllers/console/version.py +++ b/api/controllers/console/version.py @@ -34,7 +34,7 @@ class VersionApi(Resource): try: response = requests.get(check_update_url, {"current_version": args.get("current_version")}) except Exception as error: - logging.warning("Check update version error: {}.".format(str(error))) + logging.warning("Check update version error: %s.", str(error)) result["version"] = args.get("current_version") return result @@ -55,7 +55,7 @@ def _has_new_version(*, latest_version: str, current_version: str) -> bool: # Compare versions return latest > current except version.InvalidVersion: - logging.warning(f"Invalid version format: latest={latest_version}, current={current_version}") + logging.warning("Invalid version format: latest=%s, current=%s", latest_version, current_version) return False diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 5cd2e0cd2d..45513c368d 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -9,6 +9,7 @@ from configs import dify_config from constants.languages import supported_language from controllers.console import api from controllers.console.auth.error import ( + AccountInFreezeError, EmailAlreadyInUseError, EmailChangeLimitError, EmailCodeError, @@ -479,21 +480,28 @@ class ChangeEmailResetApi(Resource): parser.add_argument("token", type=str, required=True, nullable=False, location="json") args = parser.parse_args() + if AccountService.is_account_in_freeze(args["new_email"]): + raise AccountInFreezeError() + + if not AccountService.check_email_unique(args["new_email"]): + raise EmailAlreadyInUseError() + reset_data = AccountService.get_change_email_data(args["token"]) if not reset_data: raise InvalidTokenError() AccountService.revoke_change_email_token(args["token"]) - if not AccountService.check_email_unique(args["new_email"]): - raise EmailAlreadyInUseError() - old_email = reset_data.get("old_email", "") if current_user.email != old_email: raise AccountNotFound() updated_account = AccountService.update_account(current_user, email=args["new_email"]) + AccountService.send_change_email_completed_notify_email( + email=args["new_email"], + ) + return updated_account @@ -503,6 +511,8 @@ class CheckEmailUnique(Resource): parser = reqparse.RequestParser() parser.add_argument("email", type=email, required=True, location="json") args = parser.parse_args() + if AccountService.is_account_in_freeze(args["email"]): + raise AccountInFreezeError() if not AccountService.check_email_unique(args["email"]): raise EmailAlreadyInUseError() return {"result": "success"} diff --git a/api/controllers/console/workspace/models.py b/api/controllers/console/workspace/models.py index 37d0f6c764..514d1084c4 100644 --- a/api/controllers/console/workspace/models.py +++ b/api/controllers/console/workspace/models.py @@ -73,8 +73,9 @@ class DefaultModelApi(Resource): ) except Exception as ex: logging.exception( - f"Failed to update default model, model type: {model_setting['model_type']}," - f" model:{model_setting.get('model')}" + "Failed to update default model, model type: %s, model: %s", + model_setting["model_type"], + model_setting.get("model"), ) raise ex @@ -160,8 +161,10 @@ class ModelProviderModelApi(Resource): ) except CredentialsValidateFailedError as ex: logging.exception( - f"Failed to save model credentials, tenant_id: {tenant_id}," - f" model: {args.get('model')}, model_type: {args.get('model_type')}" + "Failed to save model credentials, tenant_id: %s, model: %s, model_type: %s", + tenant_id, + args.get("model"), + args.get("model_type"), ) raise ValueError(str(ex)) diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index 595ae118ef..9b22c535f4 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -34,7 +34,7 @@ class AnnotationReplyActionStatusApi(Resource): @validate_app_token def get(self, app_model: App, job_id, action): job_id = str(job_id) - app_annotation_job_key = "{}_app_annotation_job_{}".format(action, str(job_id)) + app_annotation_job_key = f"{action}_app_annotation_job_{str(job_id)}" cache_result = redis_client.get(app_annotation_job_key) if cache_result is None: raise ValueError("The job does not exist.") @@ -42,7 +42,7 @@ class AnnotationReplyActionStatusApi(Resource): job_status = cache_result.decode() error_msg = "" if job_status == "error": - app_annotation_error_key = "{}_app_annotation_error_{}".format(action, str(job_id)) + app_annotation_error_key = f"{action}_app_annotation_error_{str(job_id)}" error_msg = redis_client.get(app_annotation_error_key).decode() return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200 diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py index 1f3c218d59..ad9b625350 100644 --- a/api/core/agent/base_agent_runner.py +++ b/api/core/agent/base_agent_runner.py @@ -280,7 +280,7 @@ class BaseAgentRunner(AppRunner): def create_agent_thought( self, message_id: str, message: str, tool_name: str, tool_input: str, messages_ids: list[str] - ) -> MessageAgentThought: + ) -> str: """ Create agent thought """ @@ -313,16 +313,15 @@ class BaseAgentRunner(AppRunner): db.session.add(thought) db.session.commit() - db.session.refresh(thought) + agent_thought_id = str(thought.id) + self.agent_thought_count += 1 db.session.close() - self.agent_thought_count += 1 - - return thought + return agent_thought_id def save_agent_thought( self, - agent_thought: MessageAgentThought, + agent_thought_id: str, tool_name: str | None, tool_input: Union[str, dict, None], thought: str | None, @@ -335,12 +334,9 @@ class BaseAgentRunner(AppRunner): """ Save agent thought """ - updated_agent_thought = ( - db.session.query(MessageAgentThought).where(MessageAgentThought.id == agent_thought.id).first() - ) - if not updated_agent_thought: + agent_thought = db.session.query(MessageAgentThought).where(MessageAgentThought.id == agent_thought_id).first() + if not agent_thought: raise ValueError("agent thought not found") - agent_thought = updated_agent_thought if thought: agent_thought.thought += thought @@ -355,7 +351,7 @@ class BaseAgentRunner(AppRunner): except Exception: tool_input = json.dumps(tool_input) - updated_agent_thought.tool_input = tool_input + agent_thought.tool_input = tool_input if observation: if isinstance(observation, dict): @@ -364,27 +360,27 @@ class BaseAgentRunner(AppRunner): except Exception: observation = json.dumps(observation) - updated_agent_thought.observation = observation + agent_thought.observation = observation if answer: agent_thought.answer = answer if messages_ids is not None and len(messages_ids) > 0: - updated_agent_thought.message_files = json.dumps(messages_ids) + agent_thought.message_files = json.dumps(messages_ids) if llm_usage: - updated_agent_thought.message_token = llm_usage.prompt_tokens - updated_agent_thought.message_price_unit = llm_usage.prompt_price_unit - updated_agent_thought.message_unit_price = llm_usage.prompt_unit_price - updated_agent_thought.answer_token = llm_usage.completion_tokens - updated_agent_thought.answer_price_unit = llm_usage.completion_price_unit - updated_agent_thought.answer_unit_price = llm_usage.completion_unit_price - updated_agent_thought.tokens = llm_usage.total_tokens - updated_agent_thought.total_price = llm_usage.total_price + agent_thought.message_token = llm_usage.prompt_tokens + agent_thought.message_price_unit = llm_usage.prompt_price_unit + agent_thought.message_unit_price = llm_usage.prompt_unit_price + agent_thought.answer_token = llm_usage.completion_tokens + agent_thought.answer_price_unit = llm_usage.completion_price_unit + agent_thought.answer_unit_price = llm_usage.completion_unit_price + agent_thought.tokens = llm_usage.total_tokens + agent_thought.total_price = llm_usage.total_price # check if tool labels is not empty - labels = updated_agent_thought.tool_labels or {} - tools = updated_agent_thought.tool.split(";") if updated_agent_thought.tool else [] + labels = agent_thought.tool_labels or {} + tools = agent_thought.tool.split(";") if agent_thought.tool else [] for tool in tools: if not tool: continue @@ -395,7 +391,7 @@ class BaseAgentRunner(AppRunner): else: labels[tool] = {"en_US": tool, "zh_Hans": tool} - updated_agent_thought.tool_labels_str = json.dumps(labels) + agent_thought.tool_labels_str = json.dumps(labels) if tool_invoke_meta is not None: if isinstance(tool_invoke_meta, dict): @@ -404,7 +400,7 @@ class BaseAgentRunner(AppRunner): except Exception: tool_invoke_meta = json.dumps(tool_invoke_meta) - updated_agent_thought.tool_meta_str = tool_invoke_meta + agent_thought.tool_meta_str = tool_invoke_meta db.session.commit() db.session.close() diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py index 4979f63432..565fb42478 100644 --- a/api/core/agent/cot_agent_runner.py +++ b/api/core/agent/cot_agent_runner.py @@ -97,13 +97,13 @@ class CotAgentRunner(BaseAgentRunner, ABC): message_file_ids: list[str] = [] - agent_thought = self.create_agent_thought( + agent_thought_id = self.create_agent_thought( message_id=message.id, message="", tool_name="", tool_input="", messages_ids=message_file_ids ) if iteration_step > 1: self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) # recalc llm max tokens @@ -133,7 +133,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): # publish agent thought if it's first iteration if iteration_step == 1: self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) for chunk in react_chunks: @@ -168,7 +168,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): usage_dict["usage"] = LLMUsage.empty_usage() self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name=(scratchpad.action.action_name if scratchpad.action and not scratchpad.is_final() else ""), tool_input={scratchpad.action.action_name: scratchpad.action.action_input} if scratchpad.action else {}, tool_invoke_meta={}, @@ -181,7 +181,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): if not scratchpad.is_final(): self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) if not scratchpad.action: @@ -212,7 +212,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): scratchpad.agent_response = tool_invoke_response self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name=scratchpad.action.action_name, tool_input={scratchpad.action.action_name: scratchpad.action.action_input}, thought=scratchpad.thought or "", @@ -224,7 +224,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): ) self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) # update prompt tool message @@ -244,7 +244,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): # save agent thought self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name="", tool_input={}, tool_invoke_meta={}, diff --git a/api/core/agent/fc_agent_runner.py b/api/core/agent/fc_agent_runner.py index 5491689ece..4df71ce9de 100644 --- a/api/core/agent/fc_agent_runner.py +++ b/api/core/agent/fc_agent_runner.py @@ -80,7 +80,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): prompt_messages_tools = [] message_file_ids: list[str] = [] - agent_thought = self.create_agent_thought( + agent_thought_id = self.create_agent_thought( message_id=message.id, message="", tool_name="", tool_input="", messages_ids=message_file_ids ) @@ -114,7 +114,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): for chunk in chunks: if is_first_chunk: self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) is_first_chunk = False # check if there is any tool call @@ -172,7 +172,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): result.message.content = "" self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) yield LLMResultChunk( @@ -205,7 +205,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): # save thought self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name=tool_call_names, tool_input=tool_call_inputs, thought=response, @@ -216,7 +216,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): llm_usage=current_llm_usage, ) self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) final_answer += response + "\n" @@ -276,7 +276,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): if len(tool_responses) > 0: # save agent thought self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name="", tool_input="", thought="", @@ -291,7 +291,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): messages_ids=message_file_ids, ) self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) # update prompt tool diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index 610a5bb278..52ae20ee16 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -600,5 +600,5 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.": # ignore this error raise GenerateTaskStoppedError() else: - logger.exception(f"Failed to process generate task pipeline, conversation_id: {conversation.id}") + logger.exception("Failed to process generate task pipeline, conversation_id: %s", conversation.id) raise e diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index dc27076a4d..abb8db34de 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -271,7 +271,7 @@ class AdvancedChatAppGenerateTaskPipeline: start_listener_time = time.time() yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id) except Exception: - logger.exception(f"Failed to listen audio message, task_id: {task_id}") + logger.exception("Failed to listen audio message, task_id: %s", task_id) break if tts_publisher: yield MessageAudioEndStreamResponse(audio="", task_id=task_id) diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index f5bc480f0a..11c979765b 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -78,7 +78,7 @@ class MessageBasedAppGenerator(BaseAppGenerator): if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.": # ignore this error raise GenerateTaskStoppedError() else: - logger.exception(f"Failed to handle response, conversation_id: {conversation.id}") + logger.exception("Failed to handle response, conversation_id: %s", conversation.id) raise e def _get_app_model_config(self, app_model: App, conversation: Optional[Conversation] = None) -> AppModelConfig: @@ -151,13 +151,7 @@ class MessageBasedAppGenerator(BaseAppGenerator): introduction = self._get_conversation_introduction(application_generate_entity) # get conversation name - if isinstance(application_generate_entity, AdvancedChatAppGenerateEntity): - query = application_generate_entity.query or "New conversation" - else: - query = next(iter(application_generate_entity.inputs.values()), "New conversation") - if isinstance(query, int): - query = str(query) - query = query or "New conversation" + query = application_generate_entity.query or "New conversation" conversation_name = (query[:20] + "…") if len(query) > 20 else query if not conversation: diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index 4c36f63c71..22b0234604 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -483,7 +483,7 @@ class WorkflowAppGenerator(BaseAppGenerator): try: runner.run() except GenerateTaskStoppedError as e: - logger.warning(f"Task stopped: {str(e)}") + logger.warning("Task stopped: %s", str(e)) pass except InvokeAuthorizationError: queue_manager.publish_error( @@ -540,6 +540,6 @@ class WorkflowAppGenerator(BaseAppGenerator): raise GenerateTaskStoppedError() else: logger.exception( - f"Fails to process generate task pipeline, task_id: {application_generate_entity.task_id}" + "Fails to process generate task pipeline, task_id: %s", application_generate_entity.task_id ) raise e diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index e31a316c56..b1e9a340bd 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -246,7 +246,7 @@ class WorkflowAppGenerateTaskPipeline: else: yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id) except Exception: - logger.exception(f"Fails to get audio trunk, task_id: {task_id}") + logger.exception("Fails to get audio trunk, task_id: %s", task_id) break if tts_publisher: yield MessageAudioEndStreamResponse(audio="", task_id=task_id) diff --git a/api/core/app/features/annotation_reply/annotation_reply.py b/api/core/app/features/annotation_reply/annotation_reply.py index 54dc69302a..b829340401 100644 --- a/api/core/app/features/annotation_reply/annotation_reply.py +++ b/api/core/app/features/annotation_reply/annotation_reply.py @@ -83,7 +83,7 @@ class AnnotationReplyFeature: return annotation except Exception as e: - logger.warning(f"Query annotation failed, exception: {str(e)}.") + logger.warning("Query annotation failed, exception: %s.", str(e)) return None return None diff --git a/api/core/app/task_pipeline/message_cycle_manager.py b/api/core/app/task_pipeline/message_cycle_manager.py index 824da0b934..f0e9425e3f 100644 --- a/api/core/app/task_pipeline/message_cycle_manager.py +++ b/api/core/app/task_pipeline/message_cycle_manager.py @@ -97,7 +97,7 @@ class MessageCycleManager: conversation.name = name except Exception as e: if dify_config.DEBUG: - logging.exception(f"generate conversation name failed, conversation_id: {conversation_id}") + logging.exception("generate conversation name failed, conversation_id: %s", conversation_id) pass db.session.merge(conversation) diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index af5c18e267..9aaa1f0b10 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -900,7 +900,7 @@ class ProviderConfiguration(BaseModel): credentials=copy_credentials, ) except Exception as ex: - logger.warning(f"get custom model schema failed, {ex}") + logger.warning("get custom model schema failed, %s", ex) continue if not custom_model_schema: @@ -1009,7 +1009,7 @@ class ProviderConfiguration(BaseModel): credentials=model_configuration.credentials, ) except Exception as ex: - logger.warning(f"get custom model schema failed, {ex}") + logger.warning("get custom model schema failed, %s", ex) continue if not custom_model_schema: diff --git a/api/core/extension/api_based_extension_requestor.py b/api/core/extension/api_based_extension_requestor.py index 3f4e20ec24..accccd8c40 100644 --- a/api/core/extension/api_based_extension_requestor.py +++ b/api/core/extension/api_based_extension_requestor.py @@ -22,7 +22,7 @@ class APIBasedExtensionRequestor: :param params: the request params :return: the response json """ - headers = {"Content-Type": "application/json", "Authorization": "Bearer {}".format(self.api_key)} + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"} url = self.api_endpoint @@ -49,8 +49,6 @@ class APIBasedExtensionRequestor: raise ValueError("request connection error") if response.status_code != 200: - raise ValueError( - "request error, status_code: {}, content: {}".format(response.status_code, response.text[:100]) - ) + raise ValueError(f"request error, status_code: {response.status_code}, content: {response.text[:100]}") return cast(dict, response.json()) diff --git a/api/core/extension/extensible.py b/api/core/extension/extensible.py index 06fdb089d4..557f7eb1ed 100644 --- a/api/core/extension/extensible.py +++ b/api/core/extension/extensible.py @@ -66,7 +66,7 @@ class Extensible: # Check for extension module file if (extension_name + ".py") not in file_names: - logging.warning(f"Missing {extension_name}.py file in {subdir_path}, Skip.") + logging.warning("Missing %s.py file in %s, Skip.", extension_name, subdir_path) continue # Check for builtin flag and position @@ -95,7 +95,7 @@ class Extensible: break if not extension_class: - logging.warning(f"Missing subclass of {cls.__name__} in {module_name}, Skip.") + logging.warning("Missing subclass of %s in %s, Skip.", cls.__name__, module_name) continue # Load schema if not builtin @@ -103,7 +103,7 @@ class Extensible: if not builtin: json_path = os.path.join(subdir_path, "schema.json") if not os.path.exists(json_path): - logging.warning(f"Missing schema.json file in {subdir_path}, Skip.") + logging.warning("Missing schema.json file in %s, Skip.", subdir_path) continue with open(json_path, encoding="utf-8") as f: diff --git a/api/core/external_data_tool/api/api.py b/api/core/external_data_tool/api/api.py index 2099a9e34c..d81f372d40 100644 --- a/api/core/external_data_tool/api/api.py +++ b/api/core/external_data_tool/api/api.py @@ -49,7 +49,7 @@ class ApiExternalDataTool(ExternalDataTool): """ # get params from config if not self.config: - raise ValueError("config is required, config: {}".format(self.config)) + raise ValueError(f"config is required, config: {self.config}") api_based_extension_id = self.config.get("api_based_extension_id") assert api_based_extension_id is not None, "api_based_extension_id is required" @@ -74,7 +74,7 @@ class ApiExternalDataTool(ExternalDataTool): # request api requestor = APIBasedExtensionRequestor(api_endpoint=api_based_extension.api_endpoint, api_key=api_key) except Exception as e: - raise ValueError("[External data tool] API query failed, variable: {}, error: {}".format(self.variable, e)) + raise ValueError(f"[External data tool] API query failed, variable: {self.variable}, error: {e}") response_json = requestor.request( point=APIBasedExtensionPoint.APP_EXTERNAL_DATA_TOOL_QUERY, @@ -90,7 +90,7 @@ class ApiExternalDataTool(ExternalDataTool): if not isinstance(response_json["result"], str): raise ValueError( - "[External data tool] API query failed, variable: {}, error: result is not string".format(self.variable) + f"[External data tool] API query failed, variable: {self.variable}, error: result is not string" ) return response_json["result"] diff --git a/api/core/helper/moderation.py b/api/core/helper/moderation.py index a324ac2767..86bac4119a 100644 --- a/api/core/helper/moderation.py +++ b/api/core/helper/moderation.py @@ -55,7 +55,7 @@ def check_moderation(tenant_id: str, model_config: ModelConfigWithCredentialsEnt if moderation_result is True: return True except Exception: - logger.exception(f"Fails to check moderation, provider_name: {provider_name}") + logger.exception("Fails to check moderation, provider_name: %s", provider_name) raise InvokeBadRequestError("Rate limit exceeded, please try again later.") return False diff --git a/api/core/helper/module_import_helper.py b/api/core/helper/module_import_helper.py index 9a041667e4..251309fa2c 100644 --- a/api/core/helper/module_import_helper.py +++ b/api/core/helper/module_import_helper.py @@ -30,7 +30,7 @@ def import_module_from_source(*, module_name: str, py_file_path: AnyStr, use_laz spec.loader.exec_module(module) return module except Exception as e: - logging.exception(f"Failed to load module {module_name} from script file '{py_file_path!r}'") + logging.exception("Failed to load module %s from script file '%s'", module_name, repr(py_file_path)) raise e diff --git a/api/core/helper/ssrf_proxy.py b/api/core/helper/ssrf_proxy.py index 11f245812e..329527633c 100644 --- a/api/core/helper/ssrf_proxy.py +++ b/api/core/helper/ssrf_proxy.py @@ -73,10 +73,12 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): if response.status_code not in STATUS_FORCELIST: return response else: - logging.warning(f"Received status code {response.status_code} for URL {url} which is in the force list") + logging.warning( + "Received status code %s for URL %s which is in the force list", response.status_code, url + ) except httpx.RequestError as e: - logging.warning(f"Request to URL {url} failed on attempt {retries + 1}: {e}") + logging.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e) if max_retries == 0: raise diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index fc5d0547fc..2387658bb6 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -84,14 +84,14 @@ class IndexingRunner: documents=documents, ) except DocumentIsPausedError: - raise DocumentIsPausedError("Document paused, document id: {}".format(dataset_document.id)) + raise DocumentIsPausedError(f"Document paused, document id: {dataset_document.id}") except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) db.session.commit() except ObjectDeletedError: - logging.warning("Document deleted, document id: {}".format(dataset_document.id)) + logging.warning("Document deleted, document id: %s", dataset_document.id) except Exception as e: logging.exception("consume document failed") dataset_document.indexing_status = "error" @@ -147,7 +147,7 @@ class IndexingRunner: index_processor=index_processor, dataset=dataset, dataset_document=dataset_document, documents=documents ) except DocumentIsPausedError: - raise DocumentIsPausedError("Document paused, document id: {}".format(dataset_document.id)) + raise DocumentIsPausedError(f"Document paused, document id: {dataset_document.id}") except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) @@ -222,7 +222,7 @@ class IndexingRunner: index_processor=index_processor, dataset=dataset, dataset_document=dataset_document, documents=documents ) except DocumentIsPausedError: - raise DocumentIsPausedError("Document paused, document id: {}".format(dataset_document.id)) + raise DocumentIsPausedError(f"Document paused, document id: {dataset_document.id}") except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) @@ -324,7 +324,8 @@ class IndexingRunner: except Exception: logging.exception( "Delete image_files failed while indexing_estimate, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) @@ -649,7 +650,7 @@ class IndexingRunner: @staticmethod def _check_document_paused_status(document_id: str): - indexing_cache_key = "document_{}_is_paused".format(document_id) + indexing_cache_key = f"document_{document_id}_is_paused" result = redis_client.get(indexing_cache_key) if result: raise DocumentIsPausedError() diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index 331ac933c8..47e5a79160 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -125,16 +125,13 @@ class LLMGenerator: return questions @classmethod - def generate_rule_config( - cls, tenant_id: str, instruction: str, model_config: dict, no_variable: bool, rule_config_max_tokens: int = 512 - ) -> dict: + def generate_rule_config(cls, tenant_id: str, instruction: str, model_config: dict, no_variable: bool) -> dict: output_parser = RuleConfigGeneratorOutputParser() error = "" error_step = "" rule_config = {"prompt": "", "variables": [], "opening_statement": "", "error": ""} - model_parameters = {"max_tokens": rule_config_max_tokens, "temperature": 0.01} - + model_parameters = model_config.get("completion_params", {}) if no_variable: prompt_template = PromptTemplateParser(WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE) @@ -170,7 +167,7 @@ class LLMGenerator: error = str(e) error_step = "generate rule config" except Exception as e: - logging.exception(f"Failed to generate rule config, model: {model_config.get('name')}") + logging.exception("Failed to generate rule config, model: %s", model_config.get("name")) rule_config["error"] = str(e) rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" @@ -267,7 +264,7 @@ class LLMGenerator: error_step = "generate conversation opener" except Exception as e: - logging.exception(f"Failed to generate rule config, model: {model_config.get('name')}") + logging.exception("Failed to generate rule config, model: %s", model_config.get("name")) rule_config["error"] = str(e) rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" @@ -276,12 +273,7 @@ class LLMGenerator: @classmethod def generate_code( - cls, - tenant_id: str, - instruction: str, - model_config: dict, - code_language: str = "javascript", - max_tokens: int = 1000, + cls, tenant_id: str, instruction: str, model_config: dict, code_language: str = "javascript" ) -> dict: if code_language == "python": prompt_template = PromptTemplateParser(PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE) @@ -305,8 +297,7 @@ class LLMGenerator: ) prompt_messages = [UserPromptMessage(content=prompt)] - model_parameters = {"max_tokens": max_tokens, "temperature": 0.01} - + model_parameters = model_config.get("completion_params", {}) try: response = cast( LLMResult, @@ -323,7 +314,7 @@ class LLMGenerator: return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"} except Exception as e: logging.exception( - f"Failed to invoke LLM model, model: {model_config.get('name')}, language: {code_language}" + "Failed to invoke LLM model, model: %s, language: %s", model_config.get("name"), code_language ) return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"} @@ -395,5 +386,5 @@ class LLMGenerator: error = str(e) return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"} except Exception as e: - logging.exception(f"Failed to invoke LLM model, model: {model_config.get('name')}") + logging.exception("Failed to invoke LLM model, model: %s", model_config.get("name")) return {"output": "", "error": f"An unexpected error occurred: {str(e)}"} diff --git a/api/core/mcp/client/sse_client.py b/api/core/mcp/client/sse_client.py index 91debcc8f9..4226e77f7e 100644 --- a/api/core/mcp/client/sse_client.py +++ b/api/core/mcp/client/sse_client.py @@ -88,7 +88,7 @@ class SSETransport: status_queue: Queue to put status updates. """ endpoint_url = urljoin(self.url, sse_data) - logger.info(f"Received endpoint URL: {endpoint_url}") + logger.info("Received endpoint URL: %s", endpoint_url) if not self._validate_endpoint_url(endpoint_url): error_msg = f"Endpoint origin does not match connection origin: {endpoint_url}" @@ -107,7 +107,7 @@ class SSETransport: """ try: message = types.JSONRPCMessage.model_validate_json(sse_data) - logger.debug(f"Received server message: {message}") + logger.debug("Received server message: %s", message) session_message = SessionMessage(message) read_queue.put(session_message) except Exception as exc: @@ -128,7 +128,7 @@ class SSETransport: case "message": self._handle_message_event(sse.data, read_queue) case _: - logger.warning(f"Unknown SSE event: {sse.event}") + logger.warning("Unknown SSE event: %s", sse.event) def sse_reader(self, event_source, read_queue: ReadQueue, status_queue: StatusQueue) -> None: """Read and process SSE events. @@ -142,7 +142,7 @@ class SSETransport: for sse in event_source.iter_sse(): self._handle_sse_event(sse, read_queue, status_queue) except httpx.ReadError as exc: - logger.debug(f"SSE reader shutting down normally: {exc}") + logger.debug("SSE reader shutting down normally: %s", exc) except Exception as exc: read_queue.put(exc) finally: @@ -165,7 +165,7 @@ class SSETransport: ), ) response.raise_for_status() - logger.debug(f"Client message sent successfully: {response.status_code}") + logger.debug("Client message sent successfully: %s", response.status_code) def post_writer(self, client: httpx.Client, endpoint_url: str, write_queue: WriteQueue) -> None: """Handle writing messages to the server. @@ -190,7 +190,7 @@ class SSETransport: except queue.Empty: continue except httpx.ReadError as exc: - logger.debug(f"Post writer shutting down normally: {exc}") + logger.debug("Post writer shutting down normally: %s", exc) except Exception as exc: logger.exception("Error writing messages") write_queue.put(exc) @@ -326,7 +326,7 @@ def send_message(http_client: httpx.Client, endpoint_url: str, session_message: ), ) response.raise_for_status() - logger.debug(f"Client message sent successfully: {response.status_code}") + logger.debug("Client message sent successfully: %s", response.status_code) except Exception as exc: logger.exception("Error sending message") raise @@ -349,13 +349,13 @@ def read_messages( if sse.event == "message": try: message = types.JSONRPCMessage.model_validate_json(sse.data) - logger.debug(f"Received server message: {message}") + logger.debug("Received server message: %s", message) yield SessionMessage(message) except Exception as exc: logger.exception("Error parsing server message") yield exc else: - logger.warning(f"Unknown SSE event: {sse.event}") + logger.warning("Unknown SSE event: %s", sse.event) except Exception as exc: logger.exception("Error reading SSE messages") yield exc diff --git a/api/core/mcp/client/streamable_client.py b/api/core/mcp/client/streamable_client.py index fbd8d05f9e..ca414ebb93 100644 --- a/api/core/mcp/client/streamable_client.py +++ b/api/core/mcp/client/streamable_client.py @@ -129,7 +129,7 @@ class StreamableHTTPTransport: new_session_id = response.headers.get(MCP_SESSION_ID) if new_session_id: self.session_id = new_session_id - logger.info(f"Received session ID: {self.session_id}") + logger.info("Received session ID: %s", self.session_id) def _handle_sse_event( self, @@ -142,7 +142,7 @@ class StreamableHTTPTransport: if sse.event == "message": try: message = JSONRPCMessage.model_validate_json(sse.data) - logger.debug(f"SSE message: {message}") + logger.debug("SSE message: %s", message) # If this is a response and we have original_request_id, replace it if original_request_id is not None and isinstance(message.root, JSONRPCResponse | JSONRPCError): @@ -168,7 +168,7 @@ class StreamableHTTPTransport: logger.debug("Received ping event") return False else: - logger.warning(f"Unknown SSE event: {sse.event}") + logger.warning("Unknown SSE event: %s", sse.event) return False def handle_get_stream( @@ -197,7 +197,7 @@ class StreamableHTTPTransport: self._handle_sse_event(sse, server_to_client_queue) except Exception as exc: - logger.debug(f"GET stream error (non-fatal): {exc}") + logger.debug("GET stream error (non-fatal): %s", exc) def _handle_resumption_request(self, ctx: RequestContext) -> None: """Handle a resumption request using GET with SSE.""" @@ -352,7 +352,7 @@ class StreamableHTTPTransport: # Check if this is a resumption request is_resumption = bool(metadata and metadata.resumption_token) - logger.debug(f"Sending client message: {message}") + logger.debug("Sending client message: %s", message) # Handle initialized notification if self._is_initialized_notification(message): @@ -389,9 +389,9 @@ class StreamableHTTPTransport: if response.status_code == 405: logger.debug("Server does not allow session termination") elif response.status_code != 200: - logger.warning(f"Session termination failed: {response.status_code}") + logger.warning("Session termination failed: %s", response.status_code) except Exception as exc: - logger.warning(f"Session termination failed: {exc}") + logger.warning("Session termination failed: %s", exc) def get_session_id(self) -> str | None: """Get the current session ID.""" diff --git a/api/core/mcp/mcp_client.py b/api/core/mcp/mcp_client.py index 5fe52c008a..875d13de05 100644 --- a/api/core/mcp/mcp_client.py +++ b/api/core/mcp/mcp_client.py @@ -75,7 +75,7 @@ class MCPClient: self.connect_server(client_factory, method_name) else: try: - logger.debug(f"Not supported method {method_name} found in URL path, trying default 'mcp' method.") + logger.debug("Not supported method %s found in URL path, trying default 'mcp' method.", method_name) self.connect_server(sse_client, "sse") except MCPConnectionError: logger.debug("MCP connection failed with 'sse', falling back to 'mcp' method.") diff --git a/api/core/mcp/session/base_session.py b/api/core/mcp/session/base_session.py index 7734b8fdd9..3b6c9a7424 100644 --- a/api/core/mcp/session/base_session.py +++ b/api/core/mcp/session/base_session.py @@ -368,7 +368,7 @@ class BaseSession( self._handle_incoming(notification) except Exception as e: # For other validation errors, log and continue - logging.warning(f"Failed to validate notification: {e}. Message was: {message.message.root}") + logging.warning("Failed to validate notification: %s. Message was: %s", e, message.message.root) else: # Response or error response_queue = self._response_streams.get(message.message.root.id) if response_queue is not None: diff --git a/api/core/model_manager.py b/api/core/model_manager.py index 4886ffe244..51af3d1877 100644 --- a/api/core/model_manager.py +++ b/api/core/model_manager.py @@ -535,9 +535,19 @@ class LBModelManager: if dify_config.DEBUG: logger.info( - f"Model LB\nid: {config.id}\nname:{config.name}\n" - f"tenant_id: {self._tenant_id}\nprovider: {self._provider}\n" - f"model_type: {self._model_type.value}\nmodel: {self._model}" + """Model LB +id: %s +name:%s +tenant_id: %s +provider: %s +model_type: %s +model: %s""", + config.id, + config.name, + self._tenant_id, + self._provider, + self._model_type.value, + self._model, ) return config diff --git a/api/core/model_runtime/model_providers/__base/large_language_model.py b/api/core/model_runtime/model_providers/__base/large_language_model.py index e2cc576f83..ce378b443d 100644 --- a/api/core/model_runtime/model_providers/__base/large_language_model.py +++ b/api/core/model_runtime/model_providers/__base/large_language_model.py @@ -440,7 +440,9 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_before_invoke failed with error {e}") + logger.warning( + "Callback %s on_before_invoke failed with error %s", callback.__class__.__name__, e + ) def _trigger_new_chunk_callbacks( self, @@ -487,7 +489,7 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_new_chunk failed with error {e}") + logger.warning("Callback %s on_new_chunk failed with error %s", callback.__class__.__name__, e) def _trigger_after_invoke_callbacks( self, @@ -535,7 +537,9 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_after_invoke failed with error {e}") + logger.warning( + "Callback %s on_after_invoke failed with error %s", callback.__class__.__name__, e + ) def _trigger_invoke_error_callbacks( self, @@ -583,4 +587,6 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_invoke_error failed with error {e}") + logger.warning( + "Callback %s on_invoke_error failed with error %s", callback.__class__.__name__, e + ) diff --git a/api/core/moderation/output_moderation.py b/api/core/moderation/output_moderation.py index 2ec315417f..b39db4b7ff 100644 --- a/api/core/moderation/output_moderation.py +++ b/api/core/moderation/output_moderation.py @@ -136,6 +136,6 @@ class OutputModeration(BaseModel): result: ModerationOutputsResult = moderation_factory.moderation_for_outputs(moderation_buffer) return result except Exception as e: - logger.exception(f"Moderation Output error, app_id: {app_id}") + logger.exception("Moderation Output error, app_id: %s", app_id) return None diff --git a/api/core/ops/aliyun_trace/aliyun_trace.py b/api/core/ops/aliyun_trace/aliyun_trace.py index cf367efdf0..af0e38f7ef 100644 --- a/api/core/ops/aliyun_trace/aliyun_trace.py +++ b/api/core/ops/aliyun_trace/aliyun_trace.py @@ -97,7 +97,7 @@ class AliyunDataTrace(BaseTraceInstance): try: return self.trace_client.get_project_url() except Exception as e: - logger.info(f"Aliyun get run url failed: {str(e)}", exc_info=True) + logger.info("Aliyun get run url failed: %s", str(e), exc_info=True) raise ValueError(f"Aliyun get run url failed: {str(e)}") def workflow_trace(self, trace_info: WorkflowTraceInfo): @@ -139,7 +139,7 @@ class AliyunDataTrace(BaseTraceInstance): start_time=convert_datetime_to_nanoseconds(trace_info.start_time), end_time=convert_datetime_to_nanoseconds(trace_info.end_time), attributes={ - GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), + GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "", GEN_AI_USER_ID: str(user_id), GEN_AI_SPAN_KIND: GenAISpanKind.CHAIN.value, GEN_AI_FRAMEWORK: "dify", @@ -161,12 +161,12 @@ class AliyunDataTrace(BaseTraceInstance): start_time=convert_datetime_to_nanoseconds(trace_info.start_time), end_time=convert_datetime_to_nanoseconds(trace_info.end_time), attributes={ - GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), + GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "", GEN_AI_USER_ID: str(user_id), GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value, GEN_AI_FRAMEWORK: "dify", - GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name", ""), - GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider", ""), + GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "", + GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "", GEN_AI_USAGE_INPUT_TOKENS: str(trace_info.message_tokens), GEN_AI_USAGE_OUTPUT_TOKENS: str(trace_info.answer_tokens), GEN_AI_USAGE_TOTAL_TOKENS: str(trace_info.total_tokens), @@ -286,7 +286,7 @@ class AliyunDataTrace(BaseTraceInstance): node_span = self.build_workflow_task_span(trace_id, workflow_span_id, trace_info, node_execution) return node_span except Exception as e: - logging.debug(f"Error occurred in build_workflow_node_span: {e}", exc_info=True) + logging.debug("Error occurred in build_workflow_node_span: %s", e, exc_info=True) return None def get_workflow_node_status(self, node_execution: WorkflowNodeExecution) -> Status: @@ -386,14 +386,14 @@ class AliyunDataTrace(BaseTraceInstance): GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "", GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value, GEN_AI_FRAMEWORK: "dify", - GEN_AI_MODEL_NAME: process_data.get("model_name", ""), - GEN_AI_SYSTEM: process_data.get("model_provider", ""), + GEN_AI_MODEL_NAME: process_data.get("model_name") or "", + GEN_AI_SYSTEM: process_data.get("model_provider") or "", GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)), GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)), GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)), GEN_AI_PROMPT: json.dumps(process_data.get("prompts", []), ensure_ascii=False), GEN_AI_COMPLETION: str(outputs.get("text", "")), - GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason", ""), + GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason") or "", INPUT_VALUE: json.dumps(process_data.get("prompts", []), ensure_ascii=False), OUTPUT_VALUE: str(outputs.get("text", "")), }, @@ -421,7 +421,7 @@ class AliyunDataTrace(BaseTraceInstance): GEN_AI_USER_ID: str(user_id), GEN_AI_SPAN_KIND: GenAISpanKind.CHAIN.value, GEN_AI_FRAMEWORK: "dify", - INPUT_VALUE: trace_info.workflow_run_inputs.get("sys.query", ""), + INPUT_VALUE: trace_info.workflow_run_inputs.get("sys.query") or "", OUTPUT_VALUE: json.dumps(trace_info.workflow_run_outputs, ensure_ascii=False), }, status=status, @@ -461,8 +461,8 @@ class AliyunDataTrace(BaseTraceInstance): attributes={ GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value, GEN_AI_FRAMEWORK: "dify", - GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name", ""), - GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider", ""), + GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "", + GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "", GEN_AI_PROMPT: json.dumps(trace_info.inputs, ensure_ascii=False), GEN_AI_COMPLETION: json.dumps(trace_info.suggested_question, ensure_ascii=False), INPUT_VALUE: json.dumps(trace_info.inputs, ensure_ascii=False), diff --git a/api/core/ops/aliyun_trace/data_exporter/traceclient.py b/api/core/ops/aliyun_trace/data_exporter/traceclient.py index ba5ac3f420..934ce95a64 100644 --- a/api/core/ops/aliyun_trace/data_exporter/traceclient.py +++ b/api/core/ops/aliyun_trace/data_exporter/traceclient.py @@ -69,10 +69,10 @@ class TraceClient: if response.status_code == 405: return True else: - logger.debug(f"AliyunTrace API check failed: Unexpected status code: {response.status_code}") + logger.debug("AliyunTrace API check failed: Unexpected status code: %s", response.status_code) return False except requests.exceptions.RequestException as e: - logger.debug(f"AliyunTrace API check failed: {str(e)}") + logger.debug("AliyunTrace API check failed: %s", str(e)) raise ValueError(f"AliyunTrace API check failed: {str(e)}") def get_project_url(self): @@ -109,7 +109,7 @@ class TraceClient: try: self.exporter.export(spans_to_export) except Exception as e: - logger.debug(f"Error exporting spans: {e}") + logger.debug("Error exporting spans: %s", e) def shutdown(self): with self.condition: diff --git a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py index 1b72a4775a..f252a022d8 100644 --- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py +++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py @@ -77,10 +77,10 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra # Create a named tracer instead of setting the global provider tracer_name = f"arize_phoenix_tracer_{arize_phoenix_config.project}" - logger.info(f"[Arize/Phoenix] Created tracer with name: {tracer_name}") + logger.info("[Arize/Phoenix] Created tracer with name: %s", tracer_name) return cast(trace_sdk.Tracer, provider.get_tracer(tracer_name)), processor except Exception as e: - logger.error(f"[Arize/Phoenix] Failed to setup the tracer: {str(e)}", exc_info=True) + logger.error("[Arize/Phoenix] Failed to setup the tracer: %s", str(e), exc_info=True) raise @@ -120,7 +120,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001") def trace(self, trace_info: BaseTraceInfo): - logger.info(f"[Arize/Phoenix] Trace: {trace_info}") + logger.info("[Arize/Phoenix] Trace: %s", trace_info) try: if isinstance(trace_info, WorkflowTraceInfo): self.workflow_trace(trace_info) @@ -138,7 +138,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) except Exception as e: - logger.error(f"[Arize/Phoenix] Error in the trace: {str(e)}", exc_info=True) + logger.error("[Arize/Phoenix] Error in the trace: %s", str(e), exc_info=True) raise def workflow_trace(self, trace_info: WorkflowTraceInfo): @@ -570,7 +570,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): trace_id = uuid_to_trace_id(trace_info.message_id) tool_span_id = RandomIdGenerator().generate_span_id() - logger.info(f"[Arize/Phoenix] Creating tool trace with trace_id: {trace_id}, span_id: {tool_span_id}") + logger.info("[Arize/Phoenix] Creating tool trace with trace_id: %s, span_id: %s", trace_id, tool_span_id) # Create span context with the same trace_id as the parent # todo: Create with the appropriate parent span context, so that the tool span is @@ -673,7 +673,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): span.set_attribute("test", "true") return True except Exception as e: - logger.info(f"[Arize/Phoenix] API check failed: {str(e)}", exc_info=True) + logger.info("[Arize/Phoenix] API check failed: %s", str(e), exc_info=True) raise ValueError(f"[Arize/Phoenix] API check failed: {str(e)}") def get_project_url(self): @@ -683,7 +683,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): else: return f"{self.arize_phoenix_config.endpoint}/projects/" except Exception as e: - logger.info(f"[Arize/Phoenix] Get run url failed: {str(e)}", exc_info=True) + logger.info("[Arize/Phoenix] Get run url failed: %s", str(e), exc_info=True) raise ValueError(f"[Arize/Phoenix] Get run url failed: {str(e)}") def _get_workflow_nodes(self, workflow_run_id: str): diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index f4a59ef3a7..d356e735ee 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -440,7 +440,7 @@ class LangFuseDataTrace(BaseTraceInstance): try: return self.langfuse_client.auth_check() except Exception as e: - logger.debug(f"LangFuse API check failed: {str(e)}") + logger.debug("LangFuse API check failed: %s", str(e)) raise ValueError(f"LangFuse API check failed: {str(e)}") def get_project_key(self): @@ -448,5 +448,5 @@ class LangFuseDataTrace(BaseTraceInstance): projects = self.langfuse_client.client.projects.get() return projects.data[0].id except Exception as e: - logger.debug(f"LangFuse get project key failed: {str(e)}") + logger.debug("LangFuse get project key failed: %s", str(e)) raise ValueError(f"LangFuse get project key failed: {str(e)}") diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index c97846dc9b..fb3f6ecf0d 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -504,7 +504,7 @@ class LangSmithDataTrace(BaseTraceInstance): self.langsmith_client.delete_project(project_name=random_project_name) return True except Exception as e: - logger.debug(f"LangSmith API check failed: {str(e)}") + logger.debug("LangSmith API check failed: %s", str(e)) raise ValueError(f"LangSmith API check failed: {str(e)}") def get_project_url(self): @@ -523,5 +523,5 @@ class LangSmithDataTrace(BaseTraceInstance): ) return project_url.split("/r/")[0] except Exception as e: - logger.debug(f"LangSmith get run url failed: {str(e)}") + logger.debug("LangSmith get run url failed: %s", str(e)) raise ValueError(f"LangSmith get run url failed: {str(e)}") diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index 6079b2faef..1e52f28350 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -453,12 +453,12 @@ class OpikDataTrace(BaseTraceInstance): self.opik_client.auth_check() return True except Exception as e: - logger.info(f"Opik API check failed: {str(e)}", exc_info=True) + logger.info("Opik API check failed: %s", str(e), exc_info=True) raise ValueError(f"Opik API check failed: {str(e)}") def get_project_url(self): try: return self.opik_client.get_project_url(project_name=self.project) except Exception as e: - logger.info(f"Opik get run url failed: {str(e)}", exc_info=True) + logger.info("Opik get run url failed: %s", str(e), exc_info=True) raise ValueError(f"Opik get run url failed: {str(e)}") diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 2b546b47cc..91cdc937a6 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -287,7 +287,7 @@ class OpsTraceManager: # create new tracing_instance and update the cache if it absent tracing_instance = trace_instance(config_class(**decrypt_trace_config)) cls.ops_trace_instances_cache[decrypt_trace_config_key] = tracing_instance - logging.info(f"new tracing_instance for app_id: {app_id}") + logging.info("new tracing_instance for app_id: %s", app_id) return tracing_instance @classmethod @@ -843,7 +843,7 @@ class TraceQueueManager: trace_task.app_id = self.app_id trace_manager_queue.put(trace_task) except Exception as e: - logging.exception(f"Error adding trace task, trace_type {trace_task.trace_type}") + logging.exception("Error adding trace task, trace_type %s", trace_task.trace_type) finally: self.start_timer() diff --git a/api/core/ops/weave_trace/weave_trace.py b/api/core/ops/weave_trace/weave_trace.py index a34b3b780c..470601b17a 100644 --- a/api/core/ops/weave_trace/weave_trace.py +++ b/api/core/ops/weave_trace/weave_trace.py @@ -66,11 +66,11 @@ class WeaveDataTrace(BaseTraceInstance): project_url = f"https://wandb.ai/{self.weave_client._project_id()}" return project_url except Exception as e: - logger.debug(f"Weave get run url failed: {str(e)}") + logger.debug("Weave get run url failed: %s", str(e)) raise ValueError(f"Weave get run url failed: {str(e)}") def trace(self, trace_info: BaseTraceInfo): - logger.debug(f"Trace info: {trace_info}") + logger.debug("Trace info: %s", trace_info) if isinstance(trace_info, WorkflowTraceInfo): self.workflow_trace(trace_info) if isinstance(trace_info, MessageTraceInfo): @@ -403,7 +403,7 @@ class WeaveDataTrace(BaseTraceInstance): print("Weave login successful") return True except Exception as e: - logger.debug(f"Weave API check failed: {str(e)}") + logger.debug("Weave API check failed: %s", str(e)) raise ValueError(f"Weave API check failed: {str(e)}") def start_call(self, run_data: WeaveTraceModel, parent_run_id: Optional[str] = None): diff --git a/api/core/plugin/impl/exc.py b/api/core/plugin/impl/exc.py index 54a0b90a8d..8b660c807d 100644 --- a/api/core/plugin/impl/exc.py +++ b/api/core/plugin/impl/exc.py @@ -1,3 +1,8 @@ +from collections.abc import Mapping + +from pydantic import TypeAdapter + + class PluginDaemonError(Exception): """Base class for all plugin daemon errors.""" @@ -36,6 +41,21 @@ class PluginDaemonBadRequestError(PluginDaemonClientSideError): class PluginInvokeError(PluginDaemonClientSideError): description: str = "Invoke Error" + def _get_error_object(self) -> Mapping: + try: + return TypeAdapter(Mapping).validate_json(self.description) + except Exception: + return {} + + def get_error_type(self) -> str: + return self._get_error_object().get("error_type", "unknown") + + def get_error_message(self) -> str: + try: + return self._get_error_object().get("message", "unknown") + except Exception: + return self.description + class PluginUniqueIdentifierError(PluginDaemonClientSideError): description: str = "Unique Identifier Error" diff --git a/api/core/rag/datasource/keyword/jieba/jieba.py b/api/core/rag/datasource/keyword/jieba/jieba.py index ec3a23bd96..7c5f47006f 100644 --- a/api/core/rag/datasource/keyword/jieba/jieba.py +++ b/api/core/rag/datasource/keyword/jieba/jieba.py @@ -24,7 +24,7 @@ class Jieba(BaseKeyword): self._config = KeywordTableConfig() def create(self, texts: list[Document], **kwargs) -> BaseKeyword: - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): keyword_table_handler = JiebaKeywordTableHandler() keyword_table = self._get_dataset_keyword_table() @@ -43,7 +43,7 @@ class Jieba(BaseKeyword): return self def add_texts(self, texts: list[Document], **kwargs): - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): keyword_table_handler = JiebaKeywordTableHandler() @@ -76,7 +76,7 @@ class Jieba(BaseKeyword): return id in set.union(*keyword_table.values()) def delete_by_ids(self, ids: list[str]) -> None: - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): keyword_table = self._get_dataset_keyword_table() if keyword_table is not None: @@ -116,7 +116,7 @@ class Jieba(BaseKeyword): return documents def delete(self) -> None: - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): dataset_keyword_table = self.dataset.dataset_keyword_table if dataset_keyword_table: diff --git a/api/core/rag/datasource/vdb/baidu/baidu_vector.py b/api/core/rag/datasource/vdb/baidu/baidu_vector.py index db7ffc9c4f..d63ca9f695 100644 --- a/api/core/rag/datasource/vdb/baidu/baidu_vector.py +++ b/api/core/rag/datasource/vdb/baidu/baidu_vector.py @@ -203,9 +203,9 @@ class BaiduVector(BaseVector): def _create_table(self, dimension: int) -> None: # Try to grab distributed lock and create table - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=60): - table_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + table_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(table_exist_cache_key): return diff --git a/api/core/rag/datasource/vdb/chroma/chroma_vector.py b/api/core/rag/datasource/vdb/chroma/chroma_vector.py index b8b265d5e6..699a602365 100644 --- a/api/core/rag/datasource/vdb/chroma/chroma_vector.py +++ b/api/core/rag/datasource/vdb/chroma/chroma_vector.py @@ -57,9 +57,9 @@ class ChromaVector(BaseVector): self.add_texts(texts, embeddings, **kwargs) def create_collection(self, collection_name: str): - lock_name = "vector_indexing_lock_{}".format(collection_name) + lock_name = f"vector_indexing_lock_{collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return self._client.get_or_create_collection(collection_name) diff --git a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py b/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py index 68a9952789..bd986393d1 100644 --- a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py +++ b/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py @@ -74,9 +74,9 @@ class CouchbaseVector(BaseVector): self.add_texts(texts, embeddings) def _create_collection(self, vector_length: int, uuid: str): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return if self._collection_exists(self._collection_name): @@ -242,7 +242,7 @@ class CouchbaseVector(BaseVector): try: self._cluster.query(query, named_parameters={"doc_ids": ids}).execute() except Exception as e: - logger.exception(f"Failed to delete documents, ids: {ids}") + logger.exception("Failed to delete documents, ids: %s", ids) def delete_by_document_id(self, document_id: str): query = f""" diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py index 27575197fa..7118029d40 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py +++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py @@ -29,7 +29,7 @@ class ElasticSearchJaVector(ElasticSearchVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if not self._client.indices.exists(index=self._collection_name): diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py index ad39717183..832485b236 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py +++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py @@ -186,7 +186,7 @@ class ElasticSearchVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if not self._client.indices.exists(index=self._collection_name): diff --git a/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py b/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py index 89423eb160..0a4067e39c 100644 --- a/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py +++ b/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py @@ -164,7 +164,7 @@ class HuaweiCloudVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if not self._client.indices.exists(index=self._collection_name): diff --git a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py index e9ff1ce43d..3c65a41f08 100644 --- a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py +++ b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py @@ -89,7 +89,7 @@ class LindormVectorStore(BaseVector): timeout: int = 60, **kwargs, ): - logger.info(f"Total documents to add: {len(documents)}") + logger.info("Total documents to add: %s", len(documents)) uuids = self._get_uuids(documents) total_docs = len(documents) @@ -147,7 +147,7 @@ class LindormVectorStore(BaseVector): time.sleep(0.5) except Exception: - logger.exception(f"Failed to process batch {batch_num + 1}") + logger.exception("Failed to process batch %s", batch_num + 1) raise def get_ids_by_metadata_field(self, key: str, value: str): @@ -180,7 +180,7 @@ class LindormVectorStore(BaseVector): # 1. First check if collection exists if not self._client.indices.exists(index=self._collection_name): - logger.warning(f"Collection {self._collection_name} does not exist") + logger.warning("Collection %s does not exist", self._collection_name) return # 2. Batch process deletions @@ -196,7 +196,7 @@ class LindormVectorStore(BaseVector): } ) else: - logger.warning(f"DELETE BY ID: ID {id} does not exist in the index.") + logger.warning("DELETE BY ID: ID %s does not exist in the index.", id) # 3. Perform bulk deletion if there are valid documents to delete if actions: @@ -209,9 +209,9 @@ class LindormVectorStore(BaseVector): doc_id = delete_error.get("_id") if status == 404: - logger.warning(f"Document not found for deletion: {doc_id}") + logger.warning("Document not found for deletion: %s", doc_id) else: - logger.exception(f"Error deleting document: {error}") + logger.exception("Error deleting document: %s", error) def delete(self) -> None: if self._using_ugc: @@ -225,7 +225,7 @@ class LindormVectorStore(BaseVector): self._client.indices.delete(index=self._collection_name, params={"timeout": 60}) logger.info("Delete index success") else: - logger.warning(f"Index '{self._collection_name}' does not exist. No deletion performed.") + logger.warning("Index '%s' does not exist. No deletion performed.", self._collection_name) def text_exists(self, id: str) -> bool: try: @@ -257,7 +257,7 @@ class LindormVectorStore(BaseVector): params["routing"] = self._routing # type: ignore response = self._client.search(index=self._collection_name, body=query, params=params) except Exception: - logger.exception(f"Error executing vector search, query: {query}") + logger.exception("Error executing vector search, query: %s", query) raise docs_and_scores = [] @@ -324,10 +324,10 @@ class LindormVectorStore(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if self._client.indices.exists(index=self._collection_name): - logger.info(f"{self._collection_name.lower()} already exists.") + logger.info("%s already exists.", self._collection_name.lower()) redis_client.set(collection_exist_cache_key, 1, ex=3600) return if len(self.kwargs) == 0 and len(kwargs) != 0: diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index 63de6a0603..d64f366e0e 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -103,7 +103,7 @@ class MilvusVector(BaseVector): # For standard Milvus installations, check version number return version.parse(milvus_version).base_version >= version.parse("2.5.0").base_version except Exception as e: - logger.warning(f"Failed to check Milvus version: {str(e)}. Disabling hybrid search.") + logger.warning("Failed to check Milvus version: %s. Disabling hybrid search.", str(e)) return False def get_type(self) -> str: @@ -289,9 +289,9 @@ class MilvusVector(BaseVector): """ Create a new collection in Milvus with the specified schema and index parameters. """ - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return # Grab the existing collection if it exists diff --git a/api/core/rag/datasource/vdb/myscale/myscale_vector.py b/api/core/rag/datasource/vdb/myscale/myscale_vector.py index dbb1a7fe19..d5ec4b4436 100644 --- a/api/core/rag/datasource/vdb/myscale/myscale_vector.py +++ b/api/core/rag/datasource/vdb/myscale/myscale_vector.py @@ -53,7 +53,7 @@ class MyScaleVector(BaseVector): return self.add_texts(documents=texts, embeddings=embeddings, **kwargs) def _create_collection(self, dimension: int): - logging.info(f"create MyScale collection {self._collection_name} with dimension {dimension}") + logging.info("create MyScale collection %s with dimension %s", self._collection_name, dimension) self._client.command(f"CREATE DATABASE IF NOT EXISTS {self._config.database}") fts_params = f"('{self._config.fts_params}')" if self._config.fts_params else "" sql = f""" @@ -151,7 +151,7 @@ class MyScaleVector(BaseVector): for r in self._client.query(sql).named_results() ] except Exception as e: - logging.exception(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m") # noqa:TRY401 + logging.exception("\033[91m\033[1m%s\033[0m \033[95m%s\033[0m", type(e), str(e)) # noqa:TRY401 return [] def delete(self) -> None: diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index dd196e1f09..d6dfe967d7 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -147,7 +147,7 @@ class OceanBaseVector(BaseVector): logger.debug("Current OceanBase version is %s", ob_version) return version.parse(ob_version).base_version >= version.parse("4.3.5.1").base_version except Exception as e: - logger.warning(f"Failed to check OceanBase version: {str(e)}. Disabling hybrid search.") + logger.warning("Failed to check OceanBase version: %s. Disabling hybrid search.", str(e)) return False def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): @@ -229,7 +229,7 @@ class OceanBaseVector(BaseVector): return docs except Exception as e: - logger.warning(f"Failed to fulltext search: {str(e)}.") + logger.warning("Failed to fulltext search: %s.", str(e)) return [] def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: diff --git a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py index 0abb3c0077..ed2dcb40ad 100644 --- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py +++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py @@ -131,7 +131,7 @@ class OpenSearchVector(BaseVector): def delete_by_ids(self, ids: list[str]) -> None: index_name = self._collection_name.lower() if not self._client.indices.exists(index=index_name): - logger.warning(f"Index {index_name} does not exist") + logger.warning("Index %s does not exist", index_name) return # Obtaining All Actual Documents_ID @@ -142,7 +142,7 @@ class OpenSearchVector(BaseVector): if es_ids: actual_ids.extend(es_ids) else: - logger.warning(f"Document with metadata doc_id {doc_id} not found for deletion") + logger.warning("Document with metadata doc_id %s not found for deletion", doc_id) if actual_ids: actions = [{"_op_type": "delete", "_index": index_name, "_id": es_id} for es_id in actual_ids] @@ -155,9 +155,9 @@ class OpenSearchVector(BaseVector): doc_id = delete_error.get("_id") if status == 404: - logger.warning(f"Document not found for deletion: {doc_id}") + logger.warning("Document not found for deletion: %s", doc_id) else: - logger.exception(f"Error deleting document: {error}") + logger.exception("Error deleting document: %s", error) def delete(self) -> None: self._client.indices.delete(index=self._collection_name.lower()) @@ -198,7 +198,7 @@ class OpenSearchVector(BaseVector): try: response = self._client.search(index=self._collection_name.lower(), body=query) except Exception as e: - logger.exception(f"Error executing vector search, query: {query}") + logger.exception("Error executing vector search, query: %s", query) raise docs = [] @@ -242,7 +242,7 @@ class OpenSearchVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name.lower()}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name.lower()} already exists.") + logger.info("Collection %s already exists.", self._collection_name.lower()) return if not self._client.indices.exists(index=self._collection_name.lower()): @@ -272,7 +272,7 @@ class OpenSearchVector(BaseVector): }, } - logger.info(f"Creating OpenSearch index {self._collection_name.lower()}") + logger.info("Creating OpenSearch index %s", self._collection_name.lower()) self._client.indices.create(index=self._collection_name.lower(), body=index_body) redis_client.set(collection_exist_cache_key, 1, ex=3600) diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py index b0f0eeca38..e77befcdae 100644 --- a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py +++ b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py @@ -82,9 +82,9 @@ class PGVectoRS(BaseVector): self.add_texts(texts, embeddings) def create_collection(self, dimension: int): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return index_name = f"{self._collection_name}_embedding_index" diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 04e9cf801e..746773da63 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -155,7 +155,7 @@ class PGVector(BaseVector): cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s", (tuple(ids),)) except psycopg2.errors.UndefinedTable: # table not exists - logging.warning(f"Table {self.table_name} not found, skipping delete operation.") + logging.warning("Table %s not found, skipping delete operation.", self.table_name) return except Exception as e: raise e diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py index dfb95a1839..9741dd8b1d 100644 --- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py +++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py @@ -95,9 +95,9 @@ class QdrantVector(BaseVector): self.add_texts(texts, embeddings, **kwargs) def create_collection(self, collection_name: str, vector_size: int): - lock_name = "vector_indexing_lock_{}".format(collection_name) + lock_name = f"vector_indexing_lock_{collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return collection_name = collection_name or uuid.uuid4().hex diff --git a/api/core/rag/datasource/vdb/relyt/relyt_vector.py b/api/core/rag/datasource/vdb/relyt/relyt_vector.py index 0c0d6a463d..7a42dd1a89 100644 --- a/api/core/rag/datasource/vdb/relyt/relyt_vector.py +++ b/api/core/rag/datasource/vdb/relyt/relyt_vector.py @@ -70,9 +70,9 @@ class RelytVector(BaseVector): self.add_texts(texts, embeddings) def create_collection(self, dimension: int): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return index_name = f"{self._collection_name}_embedding_index" diff --git a/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py b/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py index 9ed6e7369b..784e27fc7f 100644 --- a/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py +++ b/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py @@ -142,7 +142,7 @@ class TableStoreVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logging.info(f"Collection {self._collection_name} already exists.") + logging.info("Collection %s already exists.", self._collection_name) return self._create_table_if_not_exist() diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index 23ed8a3344..3aa4b67a78 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -92,9 +92,9 @@ class TencentVector(BaseVector): def _create_collection(self, dimension: int) -> None: self._dimension = dimension - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return diff --git a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py index ba6a9654f0..e848b39c4d 100644 --- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py +++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py @@ -104,9 +104,9 @@ class TidbOnQdrantVector(BaseVector): self.add_texts(texts, embeddings, **kwargs) def create_collection(self, collection_name: str, vector_size: int): - lock_name = "vector_indexing_lock_{}".format(collection_name) + lock_name = f"vector_indexing_lock_{collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return collection_name = collection_name or uuid.uuid4().hex diff --git a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py index 61c68b939e..f8a851a246 100644 --- a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py +++ b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py @@ -91,9 +91,9 @@ class TiDBVector(BaseVector): def _create_collection(self, dimension: int): logger.info("_create_collection, collection_name " + self._collection_name) - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return tidb_dist_func = self._get_distance_func() @@ -192,7 +192,7 @@ class TiDBVector(BaseVector): query_vector_str = ", ".join(format(x) for x in query_vector) query_vector_str = "[" + query_vector_str + "]" logger.debug( - f"_collection_name: {self._collection_name}, score_threshold: {score_threshold}, distance: {distance}" + "_collection_name: %s, score_threshold: %s, distance: %s", self._collection_name, score_threshold, distance ) docs = [] diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index e018f7d3d4..43c49ed4b3 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -178,19 +178,19 @@ class Vector: def create(self, texts: Optional[list] = None, **kwargs): if texts: start = time.time() - logger.info(f"start embedding {len(texts)} texts {start}") + logger.info("start embedding %s texts %s", len(texts), start) batch_size = 1000 total_batches = len(texts) + batch_size - 1 for i in range(0, len(texts), batch_size): batch = texts[i : i + batch_size] batch_start = time.time() - logger.info(f"Processing batch {i // batch_size + 1}/{total_batches} ({len(batch)} texts)") + logger.info("Processing batch %s/%s (%s texts)", i // batch_size + 1, total_batches, len(batch)) batch_embeddings = self._embeddings.embed_documents([document.page_content for document in batch]) logger.info( - f"Embedding batch {i // batch_size + 1}/{total_batches} took {time.time() - batch_start:.3f}s" + "Embedding batch %s/%s took %s s", i // batch_size + 1, total_batches, time.time() - batch_start ) self._vector_processor.create(texts=batch, embeddings=batch_embeddings, **kwargs) - logger.info(f"Embedding {len(texts)} texts took {time.time() - start:.3f}s") + logger.info("Embedding %s texts took %s s", len(texts), time.time() - start) def add_texts(self, documents: list[Document], **kwargs): if kwargs.get("duplicate_check", False): @@ -219,7 +219,7 @@ class Vector: self._vector_processor.delete() # delete collection redis cache if self._vector_processor.collection_name: - collection_exist_cache_key = "vector_indexing_{}".format(self._vector_processor.collection_name) + collection_exist_cache_key = f"vector_indexing_{self._vector_processor.collection_name}" redis_client.delete(collection_exist_cache_key) def _get_embeddings(self) -> Embeddings: diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index 7a8efb4068..5525ef1685 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -92,9 +92,9 @@ class WeaviateVector(BaseVector): self.add_texts(texts, embeddings) def _create_collection(self): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return schema = self._default_schema(self._collection_name) diff --git a/api/core/rag/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py index f844770a20..f8da3657fc 100644 --- a/api/core/rag/docstore/dataset_docstore.py +++ b/api/core/rag/docstore/dataset_docstore.py @@ -32,7 +32,7 @@ class DatasetDocumentStore: } @property - def dateset_id(self) -> Any: + def dataset_id(self) -> Any: return self._dataset.id @property @@ -123,13 +123,13 @@ class DatasetDocumentStore: db.session.flush() if save_child: if doc.children: - for postion, child in enumerate(doc.children, start=1): + for position, child in enumerate(doc.children, start=1): child_segment = ChildChunk( tenant_id=self._dataset.tenant_id, dataset_id=self._dataset.id, document_id=self._document_id, segment_id=segment_document.id, - position=postion, + position=position, index_node_id=child.metadata.get("doc_id"), index_node_hash=child.metadata.get("doc_hash"), content=child.page_content, diff --git a/api/core/rag/embedding/cached_embedding.py b/api/core/rag/embedding/cached_embedding.py index f50f9f6b60..9848a28384 100644 --- a/api/core/rag/embedding/cached_embedding.py +++ b/api/core/rag/embedding/cached_embedding.py @@ -69,7 +69,7 @@ class CacheEmbedding(Embeddings): # stackoverflow best way: https://stackoverflow.com/questions/20319813/how-to-check-list-containing-nan if np.isnan(normalized_embedding).any(): # for issue #11827 float values are not json compliant - logger.warning(f"Normalized embedding is nan: {normalized_embedding}") + logger.warning("Normalized embedding is nan: %s", normalized_embedding) continue embedding_queue_embeddings.append(normalized_embedding) except IntegrityError: @@ -122,7 +122,7 @@ class CacheEmbedding(Embeddings): raise ValueError("Normalized embedding is nan please try again") except Exception as ex: if dify_config.DEBUG: - logging.exception(f"Failed to embed query text '{text[:10]}...({len(text)} chars)'") + logging.exception("Failed to embed query text '%s...(%s chars)'", text[:10], len(text)) raise ex try: @@ -136,7 +136,9 @@ class CacheEmbedding(Embeddings): redis_client.setex(embedding_cache_key, 600, encoded_str) except Exception as ex: if dify_config.DEBUG: - logging.exception(f"Failed to add embedding to redis for the text '{text[:10]}...({len(text)} chars)'") + logging.exception( + "Failed to add embedding to redis for the text '%s...(%s chars)'", text[:10], len(text) + ) raise ex return embedding_results # type: ignore diff --git a/api/core/rag/splitter/text_splitter.py b/api/core/rag/splitter/text_splitter.py index 529d8ccd27..489aa05430 100644 --- a/api/core/rag/splitter/text_splitter.py +++ b/api/core/rag/splitter/text_splitter.py @@ -116,7 +116,7 @@ class TextSplitter(BaseDocumentTransformer, ABC): if total + _len + (separator_len if len(current_doc) > 0 else 0) > self._chunk_size: if total > self._chunk_size: logger.warning( - f"Created a chunk of size {total}, which is longer than the specified {self._chunk_size}" + "Created a chunk of size %s, which is longer than the specified %s", total, self._chunk_size ) if len(current_doc) > 0: doc = self._join_docs(current_doc, separator) diff --git a/api/core/repositories/factory.py b/api/core/repositories/factory.py index 4118aa61c7..6e636883ae 100644 --- a/api/core/repositories/factory.py +++ b/api/core/repositories/factory.py @@ -153,7 +153,7 @@ class DifyCoreRepositoryFactory: RepositoryImportError: If the configured repository cannot be created """ class_path = dify_config.CORE_WORKFLOW_EXECUTION_REPOSITORY - logger.debug(f"Creating WorkflowExecutionRepository from: {class_path}") + logger.debug("Creating WorkflowExecutionRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) @@ -199,7 +199,7 @@ class DifyCoreRepositoryFactory: RepositoryImportError: If the configured repository cannot be created """ class_path = dify_config.CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY - logger.debug(f"Creating WorkflowNodeExecutionRepository from: {class_path}") + logger.debug("Creating WorkflowNodeExecutionRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) diff --git a/api/core/repositories/sqlalchemy_workflow_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_execution_repository.py index c579ff4028..74a49842f3 100644 --- a/api/core/repositories/sqlalchemy_workflow_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_execution_repository.py @@ -203,5 +203,5 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository): session.commit() # Update the in-memory cache for faster subsequent lookups - logger.debug(f"Updating cache for execution_id: {db_model.id}") + logger.debug("Updating cache for execution_id: %s", db_model.id) self._execution_cache[db_model.id] = db_model diff --git a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py index d4a31390f8..f4532d7f29 100644 --- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py @@ -215,7 +215,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) # Update the in-memory cache for faster subsequent lookups # Only cache if we have a node_execution_id to use as the cache key if db_model.node_execution_id: - logger.debug(f"Updating cache for node_execution_id: {db_model.node_execution_id}") + logger.debug("Updating cache for node_execution_id: %s", db_model.node_execution_id) self._node_execution_cache[db_model.node_execution_id] = db_model def get_db_models_by_workflow_run( diff --git a/api/core/tools/tool_manager.py b/api/core/tools/tool_manager.py index f286466de0..1bb4cfa4cd 100644 --- a/api/core/tools/tool_manager.py +++ b/api/core/tools/tool_manager.py @@ -206,7 +206,7 @@ class ToolManager: ) except Exception as e: builtin_provider = None - logger.info(f"Error getting builtin provider {credential_id}:{e}", exc_info=True) + logger.info("Error getting builtin provider %s:%s", credential_id, e, exc_info=True) # if the provider has been deleted, raise an error if builtin_provider is None: raise ToolProviderNotFoundError(f"provider has been deleted: {credential_id}") @@ -237,7 +237,7 @@ class ToolManager: if builtin_provider is None: raise ToolProviderNotFoundError(f"builtin provider {provider_id} not found") - encrypter, _ = create_provider_encrypter( + encrypter, cache = create_provider_encrypter( tenant_id=tenant_id, config=[ x.to_basic_provider_config() @@ -281,6 +281,7 @@ class ToolManager: builtin_provider.expires_at = refreshed_credentials.expires_at db.session.commit() decrypted_credentials = refreshed_credentials.credentials + cache.delete() return cast( BuiltinTool, @@ -569,7 +570,7 @@ class ToolManager: yield provider except Exception: - logger.exception(f"load builtin provider {provider_path}") + logger.exception("load builtin provider %s", provider_path) continue # set builtin providers loaded cls._builtin_providers_loaded = True @@ -1011,7 +1012,9 @@ class ToolManager: if variable is None: raise ToolParameterError(f"Variable {tool_input.value} does not exist") parameter_value = variable.value - elif tool_input.type in {"mixed", "constant"}: + elif tool_input.type == "constant": + parameter_value = tool_input.value + elif tool_input.type == "mixed": segment_group = variable_pool.convert_template(str(tool_input.value)) parameter_value = segment_group.text else: diff --git a/api/core/tools/utils/parser.py b/api/core/tools/utils/parser.py index a3c84615ca..3857a2a16b 100644 --- a/api/core/tools/utils/parser.py +++ b/api/core/tools/utils/parser.py @@ -105,6 +105,29 @@ class ApiBasedToolSchemaParser: # overwrite the content interface["operation"]["requestBody"]["content"][content_type]["schema"] = root + # handle allOf reference in schema properties + for prop_dict in root.get("properties", {}).values(): + for item in prop_dict.get("allOf", []): + if "$ref" in item: + ref_schema = openapi + reference = item["$ref"].split("/")[1:] + for ref in reference: + ref_schema = ref_schema[ref] + else: + ref_schema = item + for key, value in ref_schema.items(): + if isinstance(value, list): + if key not in prop_dict: + prop_dict[key] = [] + # extends list field + if isinstance(prop_dict[key], list): + prop_dict[key].extend(value) + elif key not in prop_dict: + # add new field + prop_dict[key] = value + if "allOf" in prop_dict: + del prop_dict["allOf"] + # parse body parameters if "schema" in interface["operation"]["requestBody"]["content"][content_type]: body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"] diff --git a/api/core/tools/utils/web_reader_tool.py b/api/core/tools/utils/web_reader_tool.py index cbd06fc186..df052c16db 100644 --- a/api/core/tools/utils/web_reader_tool.py +++ b/api/core/tools/utils/web_reader_tool.py @@ -55,7 +55,7 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: main_content_type = mimetypes.guess_type(filename)[0] if main_content_type not in supported_content_types: - return "Unsupported content-type [{}] of URL.".format(main_content_type) + return f"Unsupported content-type [{main_content_type}] of URL." if main_content_type in extract_processor.SUPPORT_URL_CONTENT_TYPES: return cast(str, ExtractProcessor.load_from_url(url, return_text=True)) @@ -67,7 +67,7 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: response = scraper.get(url, headers=headers, follow_redirects=True, timeout=(120, 300)) # type: ignore if response.status_code != 200: - return "URL returned status code {}.".format(response.status_code) + return f"URL returned status code {response.status_code}." # Detect encoding using chardet detected_encoding = chardet.detect(response.content) diff --git a/api/core/tools/workflow_as_tool/tool.py b/api/core/tools/workflow_as_tool/tool.py index 8b89c2a7a9..962b9f7a81 100644 --- a/api/core/tools/workflow_as_tool/tool.py +++ b/api/core/tools/workflow_as_tool/tool.py @@ -194,7 +194,7 @@ class WorkflowTool(Tool): files.append(file_dict) except Exception: - logger.exception(f"Failed to transform file {file}") + logger.exception("Failed to transform file %s", file) else: parameters_result[parameter.name] = tool_parameters.get(parameter.name) diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index b315129763..ef13277e0c 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -238,13 +238,13 @@ class GraphEngine: while True: # max steps reached if self.graph_runtime_state.node_run_steps > self.max_execution_steps: - raise GraphRunFailedError("Max steps {} reached.".format(self.max_execution_steps)) + raise GraphRunFailedError(f"Max steps {self.max_execution_steps} reached.") # or max execution time reached if self._is_timed_out( start_at=self.graph_runtime_state.start_at, max_execution_time=self.max_execution_time ): - raise GraphRunFailedError("Max execution time {}s reached.".format(self.max_execution_time)) + raise GraphRunFailedError(f"Max execution time {self.max_execution_time}s reached.") # init route node state route_node_state = self.graph_runtime_state.node_run_state.create_node_state(node_id=next_node_id) @@ -377,7 +377,7 @@ class GraphEngine: edge = cast(GraphEdge, sub_edge_mappings[0]) if edge.run_condition is None: - logger.warning(f"Edge {edge.target_node_id} run condition is None") + logger.warning("Edge %s run condition is None", edge.target_node_id) continue result = ConditionManager.get_condition_handler( @@ -848,7 +848,7 @@ class GraphEngine: ) return except Exception as e: - logger.exception(f"Node {node.title} run failed") + logger.exception("Node %s run failed", node.title) raise e def _append_variables_recursively(self, node_id: str, variable_key_list: list[str], variable_value: VariableValue): diff --git a/api/core/workflow/nodes/agent/agent_node.py b/api/core/workflow/nodes/agent/agent_node.py index c83303034e..2b6382a8a6 100644 --- a/api/core/workflow/nodes/agent/agent_node.py +++ b/api/core/workflow/nodes/agent/agent_node.py @@ -50,6 +50,7 @@ from .exc import ( AgentInputTypeError, AgentInvocationError, AgentMessageTransformError, + AgentNodeError, AgentVariableNotFoundError, AgentVariableTypeError, ToolFileNotFoundError, @@ -593,7 +594,14 @@ class AgentNode(BaseNode): variables[variable_name] = variable_value elif message.type == ToolInvokeMessage.MessageType.FILE: assert message.meta is not None - assert isinstance(message.meta, File) + assert isinstance(message.meta, dict) + # Validate that meta contains a 'file' key + if "file" not in message.meta: + raise AgentNodeError("File message is missing 'file' key in meta") + + # Validate that the file is an instance of File + if not isinstance(message.meta["file"], File): + raise AgentNodeError(f"Expected File object but got {type(message.meta['file']).__name__}") files.append(message.meta["file"]) elif message.type == ToolInvokeMessage.MessageType.LOG: assert isinstance(message.message, ToolInvokeMessage.LogMessage) diff --git a/api/core/workflow/nodes/answer/base_stream_processor.py b/api/core/workflow/nodes/answer/base_stream_processor.py index 09d5464d7a..7e84557a2d 100644 --- a/api/core/workflow/nodes/answer/base_stream_processor.py +++ b/api/core/workflow/nodes/answer/base_stream_processor.py @@ -36,7 +36,7 @@ class StreamProcessor(ABC): reachable_node_ids: list[str] = [] unreachable_first_node_ids: list[str] = [] if finished_node_id not in self.graph.edge_mapping: - logger.warning(f"node {finished_node_id} has no edge mapping") + logger.warning("node %s has no edge mapping", finished_node_id) return for edge in self.graph.edge_mapping[finished_node_id]: if ( diff --git a/api/core/workflow/nodes/base/node.py b/api/core/workflow/nodes/base/node.py index fb5ec55453..be4f79af19 100644 --- a/api/core/workflow/nodes/base/node.py +++ b/api/core/workflow/nodes/base/node.py @@ -65,7 +65,7 @@ class BaseNode: try: result = self._run() except Exception as e: - logger.exception(f"Node {self.node_id} failed to run") + logger.exception("Node %s failed to run", self.node_id) result = NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, error=str(e), diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index ab5964ebd4..f3061f7d96 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -305,7 +305,7 @@ def _extract_text_from_doc(file_content: bytes) -> str: raise TextExtractionError(f"Failed to extract text from DOC: {str(e)}") from e -def paser_docx_part(block, doc: Document, content_items, i): +def parser_docx_part(block, doc: Document, content_items, i): if isinstance(block, CT_P): content_items.append((i, "paragraph", Paragraph(block, doc))) elif isinstance(block, CT_Tbl): @@ -329,7 +329,7 @@ def _extract_text_from_docx(file_content: bytes) -> str: part = next(it, None) i = 0 while part is not None: - paser_docx_part(part, doc, content_items, i) + parser_docx_part(part, doc, content_items, i) i = i + 1 part = next(it, None) @@ -363,7 +363,7 @@ def _extract_text_from_docx(file_content: bytes) -> str: text.append(markdown_table) except Exception as e: - logger.warning(f"Failed to extract table from DOC: {e}") + logger.warning("Failed to extract table from DOC: %s", e) continue return "\n".join(text) diff --git a/api/core/workflow/nodes/http_request/node.py b/api/core/workflow/nodes/http_request/node.py index 6799d5c63c..bc1d5c9b87 100644 --- a/api/core/workflow/nodes/http_request/node.py +++ b/api/core/workflow/nodes/http_request/node.py @@ -129,7 +129,7 @@ class HttpRequestNode(BaseNode): }, ) except HttpRequestNodeError as e: - logger.warning(f"http request node {self.node_id} failed to run: {e}") + logger.warning("http request node %s failed to run: %s", self.node_id, e) return NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, error=str(e), diff --git a/api/core/workflow/nodes/if_else/if_else_node.py b/api/core/workflow/nodes/if_else/if_else_node.py index 86e703dc68..2c83ea3d4f 100644 --- a/api/core/workflow/nodes/if_else/if_else_node.py +++ b/api/core/workflow/nodes/if_else/if_else_node.py @@ -129,7 +129,7 @@ class IfElseNode(BaseNode): var_mapping: dict[str, list[str]] = {} for case in typed_node_data.cases or []: for condition in case.conditions: - key = "{}.#{}#".format(node_id, ".".join(condition.variable_selector)) + key = f"{node_id}.#{'.'.join(condition.variable_selector)}#" var_mapping[key] = condition.variable_selector return var_mapping diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py index 5842c8d64b..def1e1cfa3 100644 --- a/api/core/workflow/nodes/iteration/iteration_node.py +++ b/api/core/workflow/nodes/iteration/iteration_node.py @@ -616,7 +616,7 @@ class IterationNode(BaseNode): ) except IterationNodeError as e: - logger.warning(f"Iteration run failed:{str(e)}") + logger.warning("Iteration run failed:%s", str(e)) yield IterationRunFailedEvent( iteration_id=self.id, iteration_node_id=self.node_id, diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 34b0afc75d..e041e217ca 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -453,35 +453,34 @@ class KnowledgeRetrievalNode(BaseNode): elif node_data.metadata_filtering_mode == "manual": if node_data.metadata_filtering_conditions: conditions = [] - if node_data.metadata_filtering_conditions: - for sequence, condition in enumerate(node_data.metadata_filtering_conditions.conditions): # type: ignore - metadata_name = condition.name - expected_value = condition.value - if expected_value is not None and condition.comparison_operator not in ("empty", "not empty"): - if isinstance(expected_value, str): - expected_value = self.graph_runtime_state.variable_pool.convert_template( - expected_value - ).value[0] - if expected_value.value_type in {"number", "integer", "float"}: # type: ignore - expected_value = expected_value.value # type: ignore - elif expected_value.value_type == "string": # type: ignore - expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore - else: - raise ValueError("Invalid expected metadata value type") - conditions.append( - Condition( - name=metadata_name, - comparison_operator=condition.comparison_operator, - value=expected_value, - ) - ) - filters = self._process_metadata_filter_func( - sequence, - condition.comparison_operator, - metadata_name, - expected_value, - filters, + for sequence, condition in enumerate(node_data.metadata_filtering_conditions.conditions): # type: ignore + metadata_name = condition.name + expected_value = condition.value + if expected_value is not None and condition.comparison_operator not in ("empty", "not empty"): + if isinstance(expected_value, str): + expected_value = self.graph_runtime_state.variable_pool.convert_template( + expected_value + ).value[0] + if expected_value.value_type in {"number", "integer", "float"}: # type: ignore + expected_value = expected_value.value # type: ignore + elif expected_value.value_type == "string": # type: ignore + expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore + else: + raise ValueError("Invalid expected metadata value type") + conditions.append( + Condition( + name=metadata_name, + comparison_operator=condition.comparison_operator, + value=expected_value, ) + ) + filters = self._process_metadata_filter_func( + sequence, + condition.comparison_operator, + metadata_name, + expected_value, + filters, + ) metadata_condition = MetadataCondition( logical_operator=node_data.metadata_filtering_conditions.logical_operator, conditions=conditions, diff --git a/api/core/workflow/nodes/list_operator/node.py b/api/core/workflow/nodes/list_operator/node.py index ae9401b056..d2e022dc9d 100644 --- a/api/core/workflow/nodes/list_operator/node.py +++ b/api/core/workflow/nodes/list_operator/node.py @@ -184,11 +184,10 @@ class ListOperatorNode(BaseNode): value = int(self.graph_runtime_state.variable_pool.convert_template(self._node_data.extract_by.serial).text) if value < 1: raise ValueError(f"Invalid serial index: must be >= 1, got {value}") + if value > len(variable.value): + raise InvalidKeyError(f"Invalid serial index: must be <= {len(variable.value)}, got {value}") value -= 1 - if len(variable.value) > int(value): - result = variable.value[value] - else: - result = "" + result = variable.value[value] return variable.model_copy(update={"value": [result]}) @@ -300,7 +299,7 @@ def _endswith(value: str) -> Callable[[str], bool]: def _is(value: str) -> Callable[[str], bool]: - return lambda x: x is value + return lambda x: x == value def _in(value: str | Sequence[str]) -> Callable[[str], bool]: diff --git a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py index a23d284626..45c5e0a62c 100644 --- a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py @@ -670,7 +670,7 @@ class ParameterExtractorNode(BaseNode): return cast(dict, json.loads(json_str)) except Exception: pass - logger.info(f"extra error: {result}") + logger.info("extra error: %s", result) return None def _extract_json_from_tool_call(self, tool_call: AssistantPromptMessage.ToolCall) -> Optional[dict]: @@ -690,7 +690,7 @@ class ParameterExtractorNode(BaseNode): return cast(dict, json.loads(json_str)) except Exception: pass - logger.info(f"extra error: {result}") + logger.info("extra error: %s", result) return None def _generate_default_result(self, data: ParameterExtractorNodeData) -> dict: diff --git a/api/core/workflow/nodes/question_classifier/question_classifier_node.py b/api/core/workflow/nodes/question_classifier/question_classifier_node.py index 15012fa48d..3e4984ecd5 100644 --- a/api/core/workflow/nodes/question_classifier/question_classifier_node.py +++ b/api/core/workflow/nodes/question_classifier/question_classifier_node.py @@ -385,9 +385,8 @@ class QuestionClassifierNode(BaseNode): text=QUESTION_CLASSIFIER_COMPLETION_PROMPT.format( histories=memory_str, input_text=input_text, - categories=json.dumps(categories), + categories=json.dumps(categories, ensure_ascii=False), classification_instructions=instruction, - ensure_ascii=False, ) ) diff --git a/api/core/workflow/nodes/tool/entities.py b/api/core/workflow/nodes/tool/entities.py index f0a44d919b..4f47fb1efc 100644 --- a/api/core/workflow/nodes/tool/entities.py +++ b/api/core/workflow/nodes/tool/entities.py @@ -54,7 +54,7 @@ class ToolNodeData(BaseNodeData, ToolEntity): for val in value: if not isinstance(val, str): raise ValueError("value must be a list of strings") - elif typ == "constant" and not isinstance(value, str | int | float | bool): + elif typ == "constant" and not isinstance(value, str | int | float | bool | dict): raise ValueError("value must be a string, int, float, or bool") return typ diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py index f437ac841d..4c8e13de70 100644 --- a/api/core/workflow/nodes/tool/tool_node.py +++ b/api/core/workflow/nodes/tool/tool_node.py @@ -6,7 +6,7 @@ from sqlalchemy.orm import Session from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler from core.file import File, FileTransferMethod -from core.plugin.impl.exc import PluginDaemonClientSideError +from core.plugin.impl.exc import PluginDaemonClientSideError, PluginInvokeError from core.plugin.impl.plugin import PluginInstaller from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter from core.tools.errors import ToolInvokeError @@ -141,13 +141,36 @@ class ToolNode(BaseNode): tenant_id=self.tenant_id, node_id=self.node_id, ) - except (PluginDaemonClientSideError, ToolInvokeError) as e: + except ToolInvokeError as e: yield RunCompletedEvent( run_result=NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, inputs=parameters_for_log, metadata={WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info}, - error=f"Failed to transform tool message: {str(e)}", + error=f"Failed to invoke tool {node_data.provider_name}: {str(e)}", + error_type=type(e).__name__, + ) + ) + except PluginInvokeError as e: + yield RunCompletedEvent( + run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + metadata={WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info}, + error="An error occurred in the plugin, " + f"please contact the author of {node_data.provider_name} for help, " + f"error type: {e.get_error_type()}, " + f"error details: {e.get_error_message()}", + error_type=type(e).__name__, + ) + ) + except PluginDaemonClientSideError as e: + yield RunCompletedEvent( + run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + metadata={WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info}, + error=f"Failed to invoke tool, error: {e.description}", error_type=type(e).__name__, ) ) diff --git a/api/core/workflow/workflow_entry.py b/api/core/workflow/workflow_entry.py index c8082ebf50..801e36e272 100644 --- a/api/core/workflow/workflow_entry.py +++ b/api/core/workflow/workflow_entry.py @@ -67,7 +67,7 @@ class WorkflowEntry: # check call depth workflow_call_max_depth = dify_config.WORKFLOW_CALL_MAX_DEPTH if call_depth > workflow_call_max_depth: - raise ValueError("Max workflow call depth {} reached.".format(workflow_call_max_depth)) + raise ValueError(f"Max workflow call depth {workflow_call_max_depth} reached.") # init workflow run state graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter()) @@ -193,7 +193,13 @@ class WorkflowEntry: # run node generator = node.run() except Exception as e: - logger.exception(f"error while running node, {workflow.id=}, {node.id=}, {node.type_=}, {node.version()=}") + logger.exception( + "error while running node, workflow_id=%s, node_id=%s, node_type=%s, node_version=%s", + workflow.id, + node.id, + node.type_, + node.version(), + ) raise WorkflowNodeRunFailedError(node=node, err_msg=str(e)) return node, generator @@ -297,7 +303,12 @@ class WorkflowEntry: return node, generator except Exception as e: - logger.exception(f"error while running node, {node.id=}, {node.type_=}, {node.version()=}") + logger.exception( + "error while running node, node_id=%s, node_type=%s, node_version=%s", + node.id, + node.type_, + node.version(), + ) raise WorkflowNodeRunFailedError(node=node, err_msg=str(e)) @staticmethod diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 4de9a25c2f..a850ea9a50 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -2,6 +2,11 @@ set -e +# Set UTF-8 encoding to address potential encoding issues in containerized environments +export LANG=${LANG:-en_US.UTF-8} +export LC_ALL=${LC_ALL:-en_US.UTF-8} +export PYTHONIOENCODING=${PYTHONIOENCODING:-utf-8} + if [[ "${MIGRATION_ENABLED}" == "true" ]]; then echo "Running migrations" flask upgrade-db diff --git a/api/events/event_handlers/create_document_index.py b/api/events/event_handlers/create_document_index.py index dc50ca8d96..bdb69945f0 100644 --- a/api/events/event_handlers/create_document_index.py +++ b/api/events/event_handlers/create_document_index.py @@ -18,7 +18,7 @@ def handle(sender, **kwargs): documents = [] start_at = time.perf_counter() for document_id in document_ids: - logging.info(click.style("Start process document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start process document: {document_id}", fg="green")) document = ( db.session.query(Document) @@ -42,7 +42,7 @@ def handle(sender, **kwargs): indexing_runner = IndexingRunner() indexing_runner.run(documents) end_at = time.perf_counter() - logging.info(click.style("Processed dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: diff --git a/api/events/event_handlers/update_provider_when_message_created.py b/api/events/event_handlers/update_provider_when_message_created.py index d3943f2eda..2ed42c71ea 100644 --- a/api/events/event_handlers/update_provider_when_message_created.py +++ b/api/events/event_handlers/update_provider_when_message_created.py @@ -131,9 +131,11 @@ def handle(sender: Message, **kwargs): duration = time_module.perf_counter() - start_time logger.info( - f"Provider updates completed successfully. " - f"Updates: {len(updates_to_perform)}, Duration: {duration:.3f}s, " - f"Tenant: {tenant_id}, Provider: {provider_name}" + "Provider updates completed successfully. Updates: %s, Duration: %s s, Tenant: %s, Provider: %s", + len(updates_to_perform), + duration, + tenant_id, + provider_name, ) except Exception as e: @@ -141,9 +143,11 @@ def handle(sender: Message, **kwargs): duration = time_module.perf_counter() - start_time logger.exception( - f"Provider updates failed after {duration:.3f}s. " - f"Updates: {len(updates_to_perform)}, Tenant: {tenant_id}, " - f"Provider: {provider_name}" + "Provider updates failed after %s s. Updates: %s, Tenant: %s, Provider: %s", + duration, + len(updates_to_perform), + tenant_id, + provider_name, ) raise @@ -219,16 +223,20 @@ def _execute_provider_updates(updates_to_perform: list[_ProviderUpdateOperation] rows_affected = result.rowcount logger.debug( - f"Provider update ({description}): {rows_affected} rows affected. " - f"Filters: {filters.model_dump()}, Values: {update_values}" + "Provider update (%s): %s rows affected. Filters: %s, Values: %s", + description, + rows_affected, + filters.model_dump(), + update_values, ) # If no rows were affected for quota updates, log a warning if rows_affected == 0 and description == "quota_deduction_update": logger.warning( - f"No Provider rows updated for quota deduction. " - f"This may indicate quota limit exceeded or provider not found. " - f"Filters: {filters.model_dump()}" + "No Provider rows updated for quota deduction. " + "This may indicate quota limit exceeded or provider not found. " + "Filters: %s", + filters.model_dump(), ) - logger.debug(f"Successfully processed {len(updates_to_perform)} Provider updates") + logger.debug("Successfully processed %s Provider updates", len(updates_to_perform)) diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 2c2846ba26..bd72c93404 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -73,13 +73,13 @@ def init_app(app: DifyApp) -> Celery: imports.append("schedule.clean_embedding_cache_task") beat_schedule["clean_embedding_cache_task"] = { "task": "schedule.clean_embedding_cache_task.clean_embedding_cache_task", - "schedule": timedelta(days=day), + "schedule": crontab(minute="0", hour="2", day_of_month=f"*/{day}"), } if dify_config.ENABLE_CLEAN_UNUSED_DATASETS_TASK: imports.append("schedule.clean_unused_datasets_task") beat_schedule["clean_unused_datasets_task"] = { "task": "schedule.clean_unused_datasets_task.clean_unused_datasets_task", - "schedule": timedelta(days=day), + "schedule": crontab(minute="0", hour="3", day_of_month=f"*/{day}"), } if dify_config.ENABLE_CREATE_TIDB_SERVERLESS_TASK: imports.append("schedule.create_tidb_serverless_task") @@ -97,7 +97,7 @@ def init_app(app: DifyApp) -> Celery: imports.append("schedule.clean_messages") beat_schedule["clean_messages"] = { "task": "schedule.clean_messages.clean_messages", - "schedule": timedelta(days=day), + "schedule": crontab(minute="0", hour="4", day_of_month=f"*/{day}"), } if dify_config.ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK: imports.append("schedule.mail_clean_document_notify_task") diff --git a/api/extensions/ext_mail.py b/api/extensions/ext_mail.py index df5d8a9c11..fe05138196 100644 --- a/api/extensions/ext_mail.py +++ b/api/extensions/ext_mail.py @@ -64,7 +64,7 @@ class Mail: sendgrid_api_key=dify_config.SENDGRID_API_KEY, _from=dify_config.MAIL_DEFAULT_SEND_FROM or "" ) case _: - raise ValueError("Unsupported mail type {}".format(mail_type)) + raise ValueError(f"Unsupported mail type {mail_type}") def send(self, to: str, subject: str, html: str, from_: Optional[str] = None): if not self._client: diff --git a/api/extensions/ext_redis.py b/api/extensions/ext_redis.py index be2f6115f7..14b9273e9d 100644 --- a/api/extensions/ext_redis.py +++ b/api/extensions/ext_redis.py @@ -137,7 +137,7 @@ def redis_fallback(default_return: Any = None): try: return func(*args, **kwargs) except RedisError as e: - logger.warning(f"Redis operation failed in {func.__name__}: {str(e)}", exc_info=True) + logger.warning("Redis operation failed in %s: %s", func.__name__, str(e), exc_info=True) return default_return return wrapper diff --git a/api/extensions/storage/azure_blob_storage.py b/api/extensions/storage/azure_blob_storage.py index 81eec94da4..7ec0889776 100644 --- a/api/extensions/storage/azure_blob_storage.py +++ b/api/extensions/storage/azure_blob_storage.py @@ -69,7 +69,7 @@ class AzureBlobStorage(BaseStorage): if self.account_key == "managedidentity": return BlobServiceClient(account_url=self.account_url, credential=self.credential) # type: ignore - cache_key = "azure_blob_sas_token_{}_{}".format(self.account_name, self.account_key) + cache_key = f"azure_blob_sas_token_{self.account_name}_{self.account_key}" cache_result = redis_client.get(cache_key) if cache_result is not None: sas_token = cache_result.decode("utf-8") diff --git a/api/extensions/storage/opendal_storage.py b/api/extensions/storage/opendal_storage.py index 12e2738e9d..0ba35506d3 100644 --- a/api/extensions/storage/opendal_storage.py +++ b/api/extensions/storage/opendal_storage.py @@ -35,21 +35,21 @@ class OpenDALStorage(BaseStorage): Path(root).mkdir(parents=True, exist_ok=True) self.op = opendal.Operator(scheme=scheme, **kwargs) # type: ignore - logger.debug(f"opendal operator created with scheme {scheme}") + logger.debug("opendal operator created with scheme %s", scheme) retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True) self.op = self.op.layer(retry_layer) logger.debug("added retry layer to opendal operator") def save(self, filename: str, data: bytes) -> None: self.op.write(path=filename, bs=data) - logger.debug(f"file {filename} saved") + logger.debug("file %s saved", filename) def load_once(self, filename: str) -> bytes: if not self.exists(filename): raise FileNotFoundError("File not found") content: bytes = self.op.read(path=filename) - logger.debug(f"file {filename} loaded") + logger.debug("file %s loaded", filename) return content def load_stream(self, filename: str) -> Generator: @@ -60,7 +60,7 @@ class OpenDALStorage(BaseStorage): file = self.op.open(path=filename, mode="rb") while chunk := file.read(batch_size): yield chunk - logger.debug(f"file {filename} loaded as stream") + logger.debug("file %s loaded as stream", filename) def download(self, filename: str, target_filepath: str): if not self.exists(filename): @@ -68,7 +68,7 @@ class OpenDALStorage(BaseStorage): with Path(target_filepath).open("wb") as f: f.write(self.op.read(path=filename)) - logger.debug(f"file {filename} downloaded to {target_filepath}") + logger.debug("file %s downloaded to %s", filename, target_filepath) def exists(self, filename: str) -> bool: res: bool = self.op.exists(path=filename) @@ -77,9 +77,9 @@ class OpenDALStorage(BaseStorage): def delete(self, filename: str): if self.exists(filename): self.op.delete(path=filename) - logger.debug(f"file {filename} deleted") + logger.debug("file %s deleted", filename) return - logger.debug(f"file {filename} not found, skip delete") + logger.debug("file %s not found, skip delete", filename) def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]: if not self.exists(path): @@ -87,13 +87,13 @@ class OpenDALStorage(BaseStorage): all_files = self.op.scan(path=path) if files and directories: - logger.debug(f"files and directories on {path} scanned") + logger.debug("files and directories on %s scanned", path) return [f.path for f in all_files] if files: - logger.debug(f"files on {path} scanned") + logger.debug("files on %s scanned", path) return [f.path for f in all_files if not f.path.endswith("/")] elif directories: - logger.debug(f"directories on {path} scanned") + logger.debug("directories on %s scanned", path) return [f.path for f in all_files if f.path.endswith("/")] else: raise ValueError("At least one of files or directories must be True") diff --git a/api/extensions/storage/volcengine_tos_storage.py b/api/extensions/storage/volcengine_tos_storage.py index 55fe6545ec..32839d3497 100644 --- a/api/extensions/storage/volcengine_tos_storage.py +++ b/api/extensions/storage/volcengine_tos_storage.py @@ -25,7 +25,7 @@ class VolcengineTosStorage(BaseStorage): def load_once(self, filename: str) -> bytes: data = self.client.get_object(bucket=self.bucket_name, key=filename).read() if not isinstance(data, bytes): - raise TypeError("Expected bytes, got {}".format(type(data).__name__)) + raise TypeError(f"Expected bytes, got {type(data).__name__}") return data def load_stream(self, filename: str) -> Generator: diff --git a/api/libs/email_i18n.py b/api/libs/email_i18n.py index bfbf41a073..b7c9f3ec6c 100644 --- a/api/libs/email_i18n.py +++ b/api/libs/email_i18n.py @@ -25,6 +25,7 @@ class EmailType(Enum): EMAIL_CODE_LOGIN = "email_code_login" CHANGE_EMAIL_OLD = "change_email_old" CHANGE_EMAIL_NEW = "change_email_new" + CHANGE_EMAIL_COMPLETED = "change_email_completed" OWNER_TRANSFER_CONFIRM = "owner_transfer_confirm" OWNER_TRANSFER_OLD_NOTIFY = "owner_transfer_old_notify" OWNER_TRANSFER_NEW_NOTIFY = "owner_transfer_new_notify" @@ -344,6 +345,18 @@ def create_default_email_config() -> EmailI18nConfig: branded_template_path="without-brand/change_mail_confirm_new_template_zh-CN.html", ), }, + EmailType.CHANGE_EMAIL_COMPLETED: { + EmailLanguage.EN_US: EmailTemplate( + subject="Your login email has been changed", + template_path="change_mail_completed_template_en-US.html", + branded_template_path="without-brand/change_mail_completed_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的登录邮箱已更改", + template_path="change_mail_completed_template_zh-CN.html", + branded_template_path="without-brand/change_mail_completed_template_zh-CN.html", + ), + }, EmailType.OWNER_TRANSFER_CONFIRM: { EmailLanguage.EN_US: EmailTemplate( subject="Verify Your Request to Transfer Workspace Ownership", diff --git a/api/libs/helper.py b/api/libs/helper.py index 00772d530a..b36f972e19 100644 --- a/api/libs/helper.py +++ b/api/libs/helper.py @@ -95,7 +95,7 @@ def email(email): if re.match(pattern, email) is not None: return email - error = "{email} is not a valid email.".format(email=email) + error = f"{email} is not a valid email." raise ValueError(error) @@ -107,7 +107,7 @@ def uuid_value(value): uuid_obj = uuid.UUID(value) return str(uuid_obj) except ValueError: - error = "{value} is not a valid uuid.".format(value=value) + error = f"{value} is not a valid uuid." raise ValueError(error) @@ -126,7 +126,7 @@ def timestamp_value(timestamp): raise ValueError return int_timestamp except ValueError: - error = "{timestamp} is not a valid timestamp.".format(timestamp=timestamp) + error = f"{timestamp} is not a valid timestamp." raise ValueError(error) @@ -169,14 +169,14 @@ def _get_float(value): try: return float(value) except (TypeError, ValueError): - raise ValueError("{} is not a valid float".format(value)) + raise ValueError(f"{value} is not a valid float") def timezone(timezone_string): if timezone_string and timezone_string in available_timezones(): return timezone_string - error = "{timezone_string} is not a valid timezone.".format(timezone_string=timezone_string) + error = f"{timezone_string} is not a valid timezone." raise ValueError(error) @@ -321,7 +321,7 @@ class TokenManager: key = cls._get_token_key(token, token_type) token_data_json = redis_client.get(key) if token_data_json is None: - logging.warning(f"{token_type} token {token} not found with key {key}") + logging.warning("%s token %s not found with key %s", token_type, token, key) return None token_data: Optional[dict[str, Any]] = json.loads(token_data_json) return token_data diff --git a/api/libs/rsa.py b/api/libs/rsa.py index ed7a0eb116..598e5bc9e3 100644 --- a/api/libs/rsa.py +++ b/api/libs/rsa.py @@ -50,13 +50,13 @@ def encrypt(text: str, public_key: Union[str, bytes]) -> bytes: def get_decrypt_decoding(tenant_id: str) -> tuple[RSA.RsaKey, object]: filepath = os.path.join("privkeys", tenant_id, "private.pem") - cache_key = "tenant_privkey:{hash}".format(hash=hashlib.sha3_256(filepath.encode()).hexdigest()) + cache_key = f"tenant_privkey:{hashlib.sha3_256(filepath.encode()).hexdigest()}" private_key = redis_client.get(cache_key) if not private_key: try: private_key = storage.load(filepath) except FileNotFoundError: - raise PrivkeyNotFoundError("Private key not found, tenant_id: {tenant_id}".format(tenant_id=tenant_id)) + raise PrivkeyNotFoundError(f"Private key not found, tenant_id: {tenant_id}") redis_client.setex(cache_key, 120, private_key) diff --git a/api/libs/sendgrid.py b/api/libs/sendgrid.py index 5409e3eeeb..cfc6c7d794 100644 --- a/api/libs/sendgrid.py +++ b/api/libs/sendgrid.py @@ -41,5 +41,5 @@ class SendGridClient: ) raise except Exception as e: - logging.exception(f"SendGridClient Unexpected error occurred while sending email to {_to}") + logging.exception("SendGridClient Unexpected error occurred while sending email to %s", _to) raise diff --git a/api/libs/smtp.py b/api/libs/smtp.py index b94386660e..a01ad6fab8 100644 --- a/api/libs/smtp.py +++ b/api/libs/smtp.py @@ -50,7 +50,7 @@ class SMTPClient: logging.exception("Timeout occurred while sending email") raise except Exception as e: - logging.exception(f"Unexpected error occurred while sending email to {mail['to']}") + logging.exception("Unexpected error occurred while sending email to %s", mail["to"]) raise finally: if smtp: diff --git a/api/models/dataset.py b/api/models/dataset.py index d877540213..01372f8bf6 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -911,7 +911,7 @@ class DatasetKeywordTable(Base): return json.loads(keyword_table_text.decode("utf-8"), cls=SetDecoder) return None except Exception as e: - logging.exception(f"Failed to load keyword table from file: {file_key}") + logging.exception("Failed to load keyword table from file: %s", file_key) return None diff --git a/api/models/model.py b/api/models/model.py index a78a91ebd5..9f6d51b315 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -32,9 +32,6 @@ from .engine import db from .enums import CreatorUserRole from .types import StringUUID -if TYPE_CHECKING: - from .workflow import Workflow - class DifySetup(Base): __tablename__ = "dify_setups" diff --git a/api/models/workflow.py b/api/models/workflow.py index 79d96e42dd..d89db6c7da 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -42,9 +42,6 @@ from .types import EnumText, StringUUID _logger = logging.getLogger(__name__) -if TYPE_CHECKING: - from models.model import AppMode - class WorkflowType(Enum): """ diff --git a/api/repositories/factory.py b/api/repositories/factory.py index 0a0adbf2c2..070cdd46dd 100644 --- a/api/repositories/factory.py +++ b/api/repositories/factory.py @@ -48,7 +48,7 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): RepositoryImportError: If the configured repository cannot be imported or instantiated """ class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY - logger.debug(f"Creating DifyAPIWorkflowNodeExecutionRepository from: {class_path}") + logger.debug("Creating DifyAPIWorkflowNodeExecutionRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) @@ -86,7 +86,7 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): RepositoryImportError: If the configured repository cannot be imported or instantiated """ class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY - logger.debug(f"Creating APIWorkflowRunRepository from: {class_path}") + logger.debug("Creating APIWorkflowRunRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) diff --git a/api/repositories/sqlalchemy_api_workflow_run_repository.py b/api/repositories/sqlalchemy_api_workflow_run_repository.py index ebd1d74b20..7c3b1f4ce0 100644 --- a/api/repositories/sqlalchemy_api_workflow_run_repository.py +++ b/api/repositories/sqlalchemy_api_workflow_run_repository.py @@ -155,7 +155,7 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): session.commit() deleted_count = cast(int, result.rowcount) - logger.info(f"Deleted {deleted_count} workflow runs by IDs") + logger.info("Deleted %s workflow runs by IDs", deleted_count) return deleted_count def delete_runs_by_app( @@ -193,11 +193,11 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): batch_deleted = result.rowcount total_deleted += batch_deleted - logger.info(f"Deleted batch of {batch_deleted} workflow runs for app {app_id}") + logger.info("Deleted batch of %s workflow runs for app %s", batch_deleted, app_id) # If we deleted fewer records than the batch size, we're done if batch_deleted < batch_size: break - logger.info(f"Total deleted {total_deleted} workflow runs for app {app_id}") + logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id) return total_deleted diff --git a/api/schedule/check_upgradable_plugin_task.py b/api/schedule/check_upgradable_plugin_task.py index c1d6018827..e27391b558 100644 --- a/api/schedule/check_upgradable_plugin_task.py +++ b/api/schedule/check_upgradable_plugin_task.py @@ -16,7 +16,7 @@ def check_upgradable_plugin_task(): start_at = time.perf_counter() now_seconds_of_day = time.time() % 86400 - 30 # we assume the tz is UTC - click.echo(click.style("Now seconds of day: {}".format(now_seconds_of_day), fg="green")) + click.echo(click.style(f"Now seconds of day: {now_seconds_of_day}", fg="green")) strategies = ( db.session.query(TenantPluginAutoUpgradeStrategy) @@ -43,7 +43,7 @@ def check_upgradable_plugin_task(): end_at = time.perf_counter() click.echo( click.style( - "Checked upgradable plugin success latency: {}".format(end_at - start_at), + f"Checked upgradable plugin success latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/schedule/clean_embedding_cache_task.py b/api/schedule/clean_embedding_cache_task.py index 024e3d6f50..2298acf6eb 100644 --- a/api/schedule/clean_embedding_cache_task.py +++ b/api/schedule/clean_embedding_cache_task.py @@ -39,4 +39,4 @@ def clean_embedding_cache_task(): else: break end_at = time.perf_counter() - click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Cleaned embedding cache from db success latency: {end_at - start_at}", fg="green")) diff --git a/api/schedule/clean_messages.py b/api/schedule/clean_messages.py index a6851e36e5..4c35745959 100644 --- a/api/schedule/clean_messages.py +++ b/api/schedule/clean_messages.py @@ -87,4 +87,4 @@ def clean_messages(): db.session.query(Message).where(Message.id == message.id).delete() db.session.commit() end_at = time.perf_counter() - click.echo(click.style("Cleaned messages from db success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Cleaned messages from db success latency: {end_at - start_at}", fg="green")) diff --git a/api/schedule/clean_unused_datasets_task.py b/api/schedule/clean_unused_datasets_task.py index 72e2e73e65..7887835bc5 100644 --- a/api/schedule/clean_unused_datasets_task.py +++ b/api/schedule/clean_unused_datasets_task.py @@ -101,11 +101,9 @@ def clean_unused_datasets_task(): # update document db.session.query(Document).filter_by(dataset_id=dataset.id).update({Document.enabled: False}) db.session.commit() - click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green")) + click.echo(click.style(f"Cleaned unused dataset {dataset.id} from db success!", fg="green")) except Exception as e: - click.echo( - click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red") - ) + click.echo(click.style(f"clean dataset index error: {e.__class__.__name__} {str(e)}", fg="red")) while True: try: # Subquery for counting new documents @@ -176,12 +174,8 @@ def clean_unused_datasets_task(): # update document db.session.query(Document).filter_by(dataset_id=dataset.id).update({Document.enabled: False}) db.session.commit() - click.echo( - click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green") - ) + click.echo(click.style(f"Cleaned unused dataset {dataset.id} from db success!", fg="green")) except Exception as e: - click.echo( - click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red") - ) + click.echo(click.style(f"clean dataset index error: {e.__class__.__name__} {str(e)}", fg="red")) end_at = time.perf_counter() - click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Cleaned unused dataset from db success latency: {end_at - start_at}", fg="green")) diff --git a/api/schedule/create_tidb_serverless_task.py b/api/schedule/create_tidb_serverless_task.py index 91953354e6..c343063fae 100644 --- a/api/schedule/create_tidb_serverless_task.py +++ b/api/schedule/create_tidb_serverless_task.py @@ -33,7 +33,7 @@ def create_tidb_serverless_task(): break end_at = time.perf_counter() - click.echo(click.style("Create tidb serverless task success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Create tidb serverless task success latency: {end_at - start_at}", fg="green")) def create_clusters(batch_size): diff --git a/api/schedule/mail_clean_document_notify_task.py b/api/schedule/mail_clean_document_notify_task.py index 5911c98b0a..03ef9062bd 100644 --- a/api/schedule/mail_clean_document_notify_task.py +++ b/api/schedule/mail_clean_document_notify_task.py @@ -90,7 +90,7 @@ def mail_clean_document_notify_task(): db.session.commit() end_at = time.perf_counter() logging.info( - click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green") + click.style(f"Send document clean notify mail succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: logging.exception("Send document clean notify mail failed") diff --git a/api/schedule/update_tidb_serverless_status_task.py b/api/schedule/update_tidb_serverless_status_task.py index 4d6c1f1877..1bfeb869e2 100644 --- a/api/schedule/update_tidb_serverless_status_task.py +++ b/api/schedule/update_tidb_serverless_status_task.py @@ -29,9 +29,7 @@ def update_tidb_serverless_status_task(): click.echo(click.style(f"Error: {e}", fg="red")) end_at = time.perf_counter() - click.echo( - click.style("Update tidb serverless status task success latency: {}".format(end_at - start_at), fg="green") - ) + click.echo(click.style(f"Update tidb serverless status task success latency: {end_at - start_at}", fg="green")) def update_clusters(tidb_serverless_list: list[TidbAuthBinding]): diff --git a/api/services/account_service.py b/api/services/account_service.py index 59bffa873c..34b3ce0543 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -54,7 +54,10 @@ from services.errors.workspace import WorkSpaceNotAllowedCreateError, Workspaces from services.feature_service import FeatureService from tasks.delete_account_task import delete_account_task from tasks.mail_account_deletion_task import send_account_deletion_verification_code -from tasks.mail_change_mail_task import send_change_mail_task +from tasks.mail_change_mail_task import ( + send_change_mail_completed_notification_task, + send_change_mail_task, +) from tasks.mail_email_code_login import send_email_code_login_mail_task from tasks.mail_invite_member_task import send_invite_member_mail_task from tasks.mail_owner_transfer_task import ( @@ -329,9 +332,9 @@ class AccountService: db.session.add(account_integrate) db.session.commit() - logging.info(f"Account {account.id} linked {provider} account {open_id}.") + logging.info("Account %s linked %s account %s.", account.id, provider, open_id) except Exception as e: - logging.exception(f"Failed to link {provider} account {open_id} to Account {account.id}") + logging.exception("Failed to link %s account %s to Account %s", provider, open_id, account.id) raise LinkAccountIntegrateError("Failed to link account.") from e @staticmethod @@ -461,6 +464,22 @@ class AccountService: cls.change_email_rate_limiter.increment_rate_limit(account_email) return token + @classmethod + def send_change_email_completed_notify_email( + cls, + account: Optional[Account] = None, + email: Optional[str] = None, + language: Optional[str] = "en-US", + ): + account_email = account.email if account else email + if account_email is None: + raise ValueError("Email must be provided.") + + send_change_mail_completed_notification_task.delay( + language=language, + to=account_email, + ) + @classmethod def send_owner_transfer_email( cls, @@ -652,6 +671,12 @@ class AccountService: return account + @classmethod + def is_account_in_freeze(cls, email: str) -> bool: + if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(email): + return True + return False + @staticmethod @redis_fallback(default_return=None) def add_login_error_rate_limit(email: str) -> None: @@ -881,7 +906,7 @@ class TenantService: """Create tenant member""" if role == TenantAccountRole.OWNER.value: if TenantService.has_roles(tenant, [TenantAccountRole.OWNER]): - logging.error(f"Tenant {tenant.id} has already an owner.") + logging.error("Tenant %s has already an owner.", tenant.id) raise Exception("Tenant already has an owner.") ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() @@ -1133,7 +1158,7 @@ class RegisterService: db.session.query(Tenant).delete() db.session.commit() - logging.exception(f"Setup account failed, email: {email}, name: {name}") + logging.exception("Setup account failed, email: %s, name: %s", email, name) raise ValueError(f"Setup failed: {e}") @classmethod @@ -1257,7 +1282,7 @@ class RegisterService: def revoke_token(cls, workspace_id: str, email: str, token: str): if workspace_id and email: email_hash = sha256(email.encode()).hexdigest() - cache_key = "member_invite_token:{}, {}:{}".format(workspace_id, email_hash, token) + cache_key = f"member_invite_token:{workspace_id}, {email_hash}:{token}" redis_client.delete(cache_key) else: redis_client.delete(cls._get_invitation_token_key(token)) diff --git a/api/services/annotation_service.py b/api/services/annotation_service.py index 7cb0b46517..3239af998e 100644 --- a/api/services/annotation_service.py +++ b/api/services/annotation_service.py @@ -74,14 +74,14 @@ class AppAnnotationService: @classmethod def enable_app_annotation(cls, args: dict, app_id: str) -> dict: - enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id)) + enable_app_annotation_key = f"enable_app_annotation_{str(app_id)}" cache_result = redis_client.get(enable_app_annotation_key) if cache_result is not None: return {"job_id": cache_result, "job_status": "processing"} # async job job_id = str(uuid.uuid4()) - enable_app_annotation_job_key = "enable_app_annotation_job_{}".format(str(job_id)) + enable_app_annotation_job_key = f"enable_app_annotation_job_{str(job_id)}" # send batch add segments task redis_client.setnx(enable_app_annotation_job_key, "waiting") enable_annotation_reply_task.delay( @@ -97,14 +97,14 @@ class AppAnnotationService: @classmethod def disable_app_annotation(cls, app_id: str) -> dict: - disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id)) + disable_app_annotation_key = f"disable_app_annotation_{str(app_id)}" cache_result = redis_client.get(disable_app_annotation_key) if cache_result is not None: return {"job_id": cache_result, "job_status": "processing"} # async job job_id = str(uuid.uuid4()) - disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id)) + disable_app_annotation_job_key = f"disable_app_annotation_job_{str(job_id)}" # send batch add segments task redis_client.setnx(disable_app_annotation_job_key, "waiting") disable_annotation_reply_task.delay(str(job_id), app_id, current_user.current_tenant_id) @@ -127,8 +127,8 @@ class AppAnnotationService: .where(MessageAnnotation.app_id == app_id) .where( or_( - MessageAnnotation.question.ilike("%{}%".format(keyword)), - MessageAnnotation.content.ilike("%{}%".format(keyword)), + MessageAnnotation.question.ilike(f"%{keyword}%"), + MessageAnnotation.content.ilike(f"%{keyword}%"), ) ) .order_by(MessageAnnotation.created_at.desc(), MessageAnnotation.id.desc()) @@ -295,7 +295,7 @@ class AppAnnotationService: raise ValueError("The number of annotations exceeds the limit of your subscription.") # async job job_id = str(uuid.uuid4()) - indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}" # send batch add segments task redis_client.setnx(indexing_cache_key, "waiting") batch_import_annotations_task.delay( @@ -440,3 +440,27 @@ class AppAnnotationService: "embedding_model_name": collection_binding_detail.model_name, }, } + + @classmethod + def clear_all_annotations(cls, app_id: str) -> dict: + app = ( + db.session.query(App) + .filter(App.id == app_id, App.tenant_id == current_user.current_tenant_id, App.status == "normal") + .first() + ) + + if not app: + raise NotFound("App not found") + + annotations_query = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id) + for annotation in annotations_query.yield_per(100): + annotation_hit_histories_query = db.session.query(AppAnnotationHitHistory).filter( + AppAnnotationHitHistory.annotation_id == annotation.id + ) + for annotation_hit_history in annotation_hit_histories_query.yield_per(100): + db.session.delete(annotation_hit_history) + + db.session.delete(annotation) + + db.session.commit() + return {"result": "success"} diff --git a/api/services/api_based_extension_service.py b/api/services/api_based_extension_service.py index 457c91e5c0..2f28eff165 100644 --- a/api/services/api_based_extension_service.py +++ b/api/services/api_based_extension_service.py @@ -102,4 +102,4 @@ class APIBasedExtensionService: if resp.get("result") != "pong": raise ValueError(resp) except Exception as e: - raise ValueError("connection error: {}".format(e)) + raise ValueError(f"connection error: {e}") diff --git a/api/services/app_service.py b/api/services/app_service.py index 0b6b85bcb2..3557f13337 100644 --- a/api/services/app_service.py +++ b/api/services/app_service.py @@ -94,7 +94,7 @@ class AppService: except (ProviderTokenNotInitError, LLMBadRequestError): model_instance = None except Exception as e: - logging.exception(f"Get default model instance failed, tenant_id: {tenant_id}") + logging.exception("Get default model instance failed, tenant_id: %s", tenant_id) model_instance = None if model_instance: diff --git a/api/services/clear_free_plan_tenant_expired_logs.py b/api/services/clear_free_plan_tenant_expired_logs.py index ad9b750d40..d057a14afb 100644 --- a/api/services/clear_free_plan_tenant_expired_logs.py +++ b/api/services/clear_free_plan_tenant_expired_logs.py @@ -228,7 +228,7 @@ class ClearFreePlanTenantExpiredLogs: # only process sandbox tenant cls.process_tenant(flask_app, tenant_id, days, batch) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) finally: nonlocal handled_tenant_count handled_tenant_count += 1 @@ -311,7 +311,7 @@ class ClearFreePlanTenantExpiredLogs: try: tenants.append(tenant_id) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) continue futures.append( diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 4872702a76..209d153b0c 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -605,8 +605,9 @@ class DatasetService: except ProviderTokenNotInitError: # If we can't get the embedding model, preserve existing settings logging.warning( - f"Failed to initialize embedding model {data['embedding_model_provider']}/{data['embedding_model']}, " - f"preserving existing settings" + "Failed to initialize embedding model %s/%s, preserving existing settings", + data["embedding_model_provider"], + data["embedding_model"], ) if dataset.embedding_model_provider and dataset.embedding_model: filtered_data["embedding_model_provider"] = dataset.embedding_model_provider @@ -649,11 +650,11 @@ class DatasetService: @staticmethod def check_dataset_permission(dataset, user): if dataset.tenant_id != user.current_tenant_id: - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id) raise NoPermissionError("You do not have permission to access this dataset.") if user.current_role != TenantAccountRole.OWNER: if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id: - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id) raise NoPermissionError("You do not have permission to access this dataset.") if dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM: # For partial team permission, user needs explicit permission or be the creator @@ -662,7 +663,7 @@ class DatasetService: db.session.query(DatasetPermission).filter_by(dataset_id=dataset.id, account_id=user.id).first() ) if not user_permission: - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id) raise NoPermissionError("You do not have permission to access this dataset.") @staticmethod @@ -1000,7 +1001,7 @@ class DocumentService: db.session.add(document) db.session.commit() # set document paused flag - indexing_cache_key = "document_{}_is_paused".format(document.id) + indexing_cache_key = f"document_{document.id}_is_paused" redis_client.setnx(indexing_cache_key, "True") @staticmethod @@ -1015,7 +1016,7 @@ class DocumentService: db.session.add(document) db.session.commit() # delete paused flag - indexing_cache_key = "document_{}_is_paused".format(document.id) + indexing_cache_key = f"document_{document.id}_is_paused" redis_client.delete(indexing_cache_key) # trigger async task recover_document_indexing_task.delay(document.dataset_id, document.id) @@ -1024,7 +1025,7 @@ class DocumentService: def retry_document(dataset_id: str, documents: list[Document]): for document in documents: # add retry flag - retry_indexing_cache_key = "document_{}_is_retried".format(document.id) + retry_indexing_cache_key = f"document_{document.id}_is_retried" cache_result = redis_client.get(retry_indexing_cache_key) if cache_result is not None: raise ValueError("Document is being retried, please try again later") @@ -1041,7 +1042,7 @@ class DocumentService: @staticmethod def sync_website_document(dataset_id: str, document: Document): # add sync flag - sync_indexing_cache_key = "document_{}_is_sync".format(document.id) + sync_indexing_cache_key = f"document_{document.id}_is_sync" cache_result = redis_client.get(sync_indexing_cache_key) if cache_result is not None: raise ValueError("Document is being synced, please try again later") @@ -1174,12 +1175,13 @@ class DocumentService: ) else: logging.warning( - f"Invalid process rule mode: {process_rule.mode}, can not find dataset process rule" + "Invalid process rule mode: %s, can not find dataset process rule", + process_rule.mode, ) return db.session.add(dataset_process_rule) db.session.commit() - lock_name = "add_document_lock_dataset_id_{}".format(dataset.id) + lock_name = f"add_document_lock_dataset_id_{dataset.id}" with redis_client.lock(lock_name, timeout=600): position = DocumentService.get_documents_position(dataset.id) document_ids = [] @@ -1862,7 +1864,7 @@ class DocumentService: task_func.delay(*task_args) except Exception as e: # Log the error but do not rollback the transaction - logging.exception(f"Error executing async task for document {update_info['document'].id}") + logging.exception("Error executing async task for document %s", update_info["document"].id) # don't raise the error immediately, but capture it for later propagation_error = e try: @@ -1873,7 +1875,7 @@ class DocumentService: redis_client.setex(indexing_cache_key, 600, 1) except Exception as e: # Log the error but do not rollback the transaction - logging.exception(f"Error setting cache for document {update_info['document'].id}") + logging.exception("Error setting cache for document %s", update_info["document"].id) # Raise any propagation error after all updates if propagation_error: raise propagation_error @@ -2001,7 +2003,7 @@ class SegmentService: ) # calc embedding use tokens tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0] - lock_name = "add_segment_lock_document_id_{}".format(document.id) + lock_name = f"add_segment_lock_document_id_{document.id}" with redis_client.lock(lock_name, timeout=600): max_position = ( db.session.query(func.max(DocumentSegment.position)) @@ -2048,7 +2050,7 @@ class SegmentService: @classmethod def multi_create_segment(cls, segments: list, document: Document, dataset: Dataset): - lock_name = "multi_add_segment_lock_document_id_{}".format(document.id) + lock_name = f"multi_add_segment_lock_document_id_{document.id}" increment_word_count = 0 with redis_client.lock(lock_name, timeout=600): embedding_model = None @@ -2130,7 +2132,7 @@ class SegmentService: @classmethod def update_segment(cls, args: SegmentUpdateArgs, segment: DocumentSegment, document: Document, dataset: Dataset): - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: raise ValueError("Segment is indexing, please try again later") @@ -2300,7 +2302,7 @@ class SegmentService: @classmethod def delete_segment(cls, segment: DocumentSegment, document: Document, dataset: Dataset): - indexing_cache_key = "segment_{}_delete_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_delete_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: raise ValueError("Segment is deleting.") @@ -2352,7 +2354,7 @@ class SegmentService: return real_deal_segmment_ids = [] for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: continue @@ -2379,7 +2381,7 @@ class SegmentService: return real_deal_segmment_ids = [] for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: continue @@ -2398,7 +2400,7 @@ class SegmentService: def create_child_chunk( cls, content: str, segment: DocumentSegment, document: Document, dataset: Dataset ) -> ChildChunk: - lock_name = "add_child_lock_{}".format(segment.id) + lock_name = f"add_child_lock_{segment.id}" with redis_client.lock(lock_name, timeout=20): index_node_id = str(uuid.uuid4()) index_node_hash = helper.generate_text_hash(content) diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index 519d5abca5..5a3f504035 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -77,7 +77,7 @@ class HitTestingService: ) end = time.perf_counter() - logging.debug(f"Hit testing retrieve in {end - start:0.4f} seconds") + logging.debug("Hit testing retrieve in %s seconds", end - start) dataset_query = DatasetQuery( dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id @@ -113,7 +113,7 @@ class HitTestingService: ) end = time.perf_counter() - logging.debug(f"External knowledge hit testing retrieve in {end - start:0.4f} seconds") + logging.debug("External knowledge hit testing retrieve in %s seconds", end - start) dataset_query = DatasetQuery( dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id diff --git a/api/services/model_load_balancing_service.py b/api/services/model_load_balancing_service.py index a200cfa146..fe28aa006e 100644 --- a/api/services/model_load_balancing_service.py +++ b/api/services/model_load_balancing_service.py @@ -340,7 +340,7 @@ class ModelLoadBalancingService: config_id = str(config_id) if config_id not in current_load_balancing_configs_dict: - raise ValueError("Invalid load balancing config id: {}".format(config_id)) + raise ValueError(f"Invalid load balancing config id: {config_id}") updated_config_ids.add(config_id) @@ -349,7 +349,7 @@ class ModelLoadBalancingService: # check duplicate name for current_load_balancing_config in current_load_balancing_configs: if current_load_balancing_config.id != config_id and current_load_balancing_config.name == name: - raise ValueError("Load balancing config name {} already exists".format(name)) + raise ValueError(f"Load balancing config name {name} already exists") if credentials: if not isinstance(credentials, dict): @@ -383,7 +383,7 @@ class ModelLoadBalancingService: # check duplicate name for current_load_balancing_config in current_load_balancing_configs: if current_load_balancing_config.name == name: - raise ValueError("Load balancing config name {} already exists".format(name)) + raise ValueError(f"Load balancing config name {name} already exists") if not credentials: raise ValueError("Invalid load balancing config credentials") diff --git a/api/services/model_provider_service.py b/api/services/model_provider_service.py index 0a0a5619e1..54197bf949 100644 --- a/api/services/model_provider_service.py +++ b/api/services/model_provider_service.py @@ -380,7 +380,7 @@ class ModelProviderService: else None ) except Exception as e: - logger.debug(f"get_default_model_of_model_type error: {e}") + logger.debug("get_default_model_of_model_type error: %s", e) return None def update_default_model_of_model_type(self, tenant_id: str, model_type: str, provider: str, model: str) -> None: diff --git a/api/services/ops_service.py b/api/services/ops_service.py index 62f37c1588..7a9db7273e 100644 --- a/api/services/ops_service.py +++ b/api/services/ops_service.py @@ -65,9 +65,7 @@ class OpsService: } ) except Exception: - new_decrypt_tracing_config.update( - {"project_url": "{host}/".format(host=decrypt_tracing_config.get("host"))} - ) + new_decrypt_tracing_config.update({"project_url": f"{decrypt_tracing_config.get('host')}/"}) if tracing_provider == "langsmith" and ( "project_url" not in decrypt_tracing_config or not decrypt_tracing_config.get("project_url") @@ -139,7 +137,7 @@ class OpsService: project_url = OpsTraceManager.get_trace_config_project_url(tracing_config, tracing_provider) elif tracing_provider == "langfuse": project_key = OpsTraceManager.get_trace_config_project_key(tracing_config, tracing_provider) - project_url = "{host}/project/{key}".format(host=tracing_config.get("host"), key=project_key) + project_url = f"{tracing_config.get('host')}/project/{project_key}" elif tracing_provider in ("langsmith", "opik"): project_url = OpsTraceManager.get_trace_config_project_url(tracing_config, tracing_provider) else: diff --git a/api/services/plugin/data_migration.py b/api/services/plugin/data_migration.py index 5324036414..7a4f886bf5 100644 --- a/api/services/plugin/data_migration.py +++ b/api/services/plugin/data_migration.py @@ -110,7 +110,7 @@ limit 1000""" ) ) logger.exception( - f"[{processed_count}] Failed to migrate [{table_name}] {record_id} ({provider_name})" + "[%s] Failed to migrate [%s] %s (%s)", processed_count, table_name, record_id, provider_name ) continue @@ -183,7 +183,7 @@ limit 1000""" ) ) logger.exception( - f"[{processed_count}] Failed to migrate [{table_name}] {record_id} ({provider_name})" + "[%s] Failed to migrate [%s] %s (%s)", processed_count, table_name, record_id, provider_name ) continue diff --git a/api/services/plugin/plugin_migration.py b/api/services/plugin/plugin_migration.py index 1806fbcfd6..222d70a317 100644 --- a/api/services/plugin/plugin_migration.py +++ b/api/services/plugin/plugin_migration.py @@ -78,7 +78,7 @@ class PluginMigration: ) ) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) futures = [] @@ -136,7 +136,7 @@ class PluginMigration: try: tenants.append(tenant_id) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) continue futures.append( @@ -273,7 +273,7 @@ class PluginMigration: result.append(ToolProviderID(tool_entity.provider_id).plugin_id) except Exception: - logger.exception(f"Failed to process tool {tool}") + logger.exception("Failed to process tool %s", tool) continue return result @@ -301,7 +301,7 @@ class PluginMigration: plugins: dict[str, str] = {} plugin_ids = [] plugin_not_exist = [] - logger.info(f"Extracting unique plugins from {extracted_plugins}") + logger.info("Extracting unique plugins from %s", extracted_plugins) with open(extracted_plugins) as f: for line in f: data = json.loads(line) @@ -318,7 +318,7 @@ class PluginMigration: else: plugin_not_exist.append(plugin_id) except Exception: - logger.exception(f"Failed to fetch plugin unique identifier for {plugin_id}") + logger.exception("Failed to fetch plugin unique identifier for %s", plugin_id) plugin_not_exist.append(plugin_id) with ThreadPoolExecutor(max_workers=10) as executor: @@ -339,7 +339,7 @@ class PluginMigration: # use a fake tenant id to install all the plugins fake_tenant_id = uuid4().hex - logger.info(f"Installing {len(plugins['plugins'])} plugin instances for fake tenant {fake_tenant_id}") + logger.info("Installing %s plugin instances for fake tenant %s", len(plugins["plugins"]), fake_tenant_id) thread_pool = ThreadPoolExecutor(max_workers=workers) @@ -348,7 +348,7 @@ class PluginMigration: plugin_install_failed.extend(response.get("failed", [])) def install(tenant_id: str, plugin_ids: list[str]) -> None: - logger.info(f"Installing {len(plugin_ids)} plugins for tenant {tenant_id}") + logger.info("Installing %s plugins for tenant %s", len(plugin_ids), tenant_id) # fetch plugin already installed installed_plugins = manager.list_plugins(tenant_id) installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] @@ -408,7 +408,7 @@ class PluginMigration: installation = manager.list_plugins(fake_tenant_id) except Exception: - logger.exception(f"Failed to get installation for tenant {fake_tenant_id}") + logger.exception("Failed to get installation for tenant %s", fake_tenant_id) Path(output_file).write_text( json.dumps( @@ -491,7 +491,9 @@ class PluginMigration: else: failed.append(reverse_map[plugin.plugin_unique_identifier]) logger.error( - f"Failed to install plugin {plugin.plugin_unique_identifier}, error: {plugin.message}" + "Failed to install plugin %s, error: %s", + plugin.plugin_unique_identifier, + plugin.message, ) done = True diff --git a/api/services/recommend_app/remote/remote_retrieval.py b/api/services/recommend_app/remote/remote_retrieval.py index 80e1aefc01..85f3a02825 100644 --- a/api/services/recommend_app/remote/remote_retrieval.py +++ b/api/services/recommend_app/remote/remote_retrieval.py @@ -20,7 +20,7 @@ class RemoteRecommendAppRetrieval(RecommendAppRetrievalBase): try: result = self.fetch_recommended_app_detail_from_dify_official(app_id) except Exception as e: - logger.warning(f"fetch recommended app detail from dify official failed: {e}, switch to built-in.") + logger.warning("fetch recommended app detail from dify official failed: %s, switch to built-in.", e) result = BuildInRecommendAppRetrieval.fetch_recommended_app_detail_from_builtin(app_id) return result @@ -28,7 +28,7 @@ class RemoteRecommendAppRetrieval(RecommendAppRetrievalBase): try: result = self.fetch_recommended_apps_from_dify_official(language) except Exception as e: - logger.warning(f"fetch recommended apps from dify official failed: {e}, switch to built-in.") + logger.warning("fetch recommended apps from dify official failed: %s, switch to built-in.", e) result = BuildInRecommendAppRetrieval.fetch_recommended_apps_from_builtin(language) return result diff --git a/api/services/tools/builtin_tools_manage_service.py b/api/services/tools/builtin_tools_manage_service.py index 65f05d2986..841eeb4333 100644 --- a/api/services/tools/builtin_tools_manage_service.py +++ b/api/services/tools/builtin_tools_manage_service.py @@ -337,7 +337,7 @@ class BuiltinToolManageService: max_number = max(numbers) return f"{default_pattern} {max_number + 1}" except Exception as e: - logger.warning(f"Error generating next provider name for {provider}: {str(e)}") + logger.warning("Error generating next provider name for %s: %s", provider, str(e)) # fallback return f"{credential_type.get_name()} 1" diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py index 2d192e6f7f..52fbc0979c 100644 --- a/api/services/tools/tools_transform_service.py +++ b/api/services/tools/tools_transform_service.py @@ -275,7 +275,7 @@ class ToolTransformService: username = user.name except Exception: - logger.exception(f"failed to get user name for api provider {db_provider.id}") + logger.exception("failed to get user name for api provider %s", db_provider.id) # add provider into providers credentials = db_provider.credentials result = ToolProviderApiEntity( diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index 204c1a4f5b..a2105f8a9d 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -22,19 +22,19 @@ def add_document_to_index_task(dataset_document_id: str): Usage: add_document_to_index_task.delay(dataset_document_id) """ - logging.info(click.style("Start add document to index: {}".format(dataset_document_id), fg="green")) + logging.info(click.style(f"Start add document to index: {dataset_document_id}", fg="green")) start_at = time.perf_counter() dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == dataset_document_id).first() if not dataset_document: - logging.info(click.style("Document not found: {}".format(dataset_document_id), fg="red")) + logging.info(click.style(f"Document not found: {dataset_document_id}", fg="red")) db.session.close() return if dataset_document.indexing_status != "completed": return - indexing_cache_key = "document_{}_indexing".format(dataset_document.id) + indexing_cache_key = f"document_{dataset_document.id}_indexing" try: dataset = dataset_document.dataset @@ -101,9 +101,7 @@ def add_document_to_index_task(dataset_document_id: str): end_at = time.perf_counter() logging.info( - click.style( - "Document added to index: {} latency: {}".format(dataset_document.id, end_at - start_at), fg="green" - ) + click.style(f"Document added to index: {dataset_document.id} latency: {end_at - start_at}", fg="green") ) except Exception as e: logging.exception("add document to index failed") diff --git a/api/tasks/annotation/add_annotation_to_index_task.py b/api/tasks/annotation/add_annotation_to_index_task.py index 2a93c21abd..e436f00133 100644 --- a/api/tasks/annotation/add_annotation_to_index_task.py +++ b/api/tasks/annotation/add_annotation_to_index_task.py @@ -25,7 +25,7 @@ def add_annotation_to_index_task( Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) """ - logging.info(click.style("Start build index for annotation: {}".format(annotation_id), fg="green")) + logging.info(click.style(f"Start build index for annotation: {annotation_id}", fg="green")) start_at = time.perf_counter() try: @@ -50,7 +50,7 @@ def add_annotation_to_index_task( end_at = time.perf_counter() logging.info( click.style( - "Build index successful for annotation: {} latency: {}".format(annotation_id, end_at - start_at), + f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/annotation/batch_import_annotations_task.py b/api/tasks/annotation/batch_import_annotations_task.py index 6d48f5df89..47dc3ee90e 100644 --- a/api/tasks/annotation/batch_import_annotations_task.py +++ b/api/tasks/annotation/batch_import_annotations_task.py @@ -25,9 +25,9 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id: :param user_id: user_id """ - logging.info(click.style("Start batch import annotation: {}".format(job_id), fg="green")) + logging.info(click.style(f"Start batch import annotation: {job_id}", fg="green")) start_at = time.perf_counter() - indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}" # get app info app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() @@ -85,7 +85,7 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id: except Exception as e: db.session.rollback() redis_client.setex(indexing_cache_key, 600, "error") - indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id)) + indexing_error_msg_key = f"app_annotation_batch_import_error_msg_{str(job_id)}" redis_client.setex(indexing_error_msg_key, 600, str(e)) logging.exception("Build index for batch import annotations failed") finally: diff --git a/api/tasks/annotation/delete_annotation_index_task.py b/api/tasks/annotation/delete_annotation_index_task.py index a6657e813a..f016400e16 100644 --- a/api/tasks/annotation/delete_annotation_index_task.py +++ b/api/tasks/annotation/delete_annotation_index_task.py @@ -15,7 +15,7 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str """ Async delete annotation index task """ - logging.info(click.style("Start delete app annotation index: {}".format(app_id), fg="green")) + logging.info(click.style(f"Start delete app annotation index: {app_id}", fg="green")) start_at = time.perf_counter() try: dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type( @@ -35,9 +35,7 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str except Exception: logging.exception("Delete annotation index failed when annotation deleted.") end_at = time.perf_counter() - logging.info( - click.style("App annotations index deleted : {} latency: {}".format(app_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("Annotation deleted index failed") finally: diff --git a/api/tasks/annotation/disable_annotation_reply_task.py b/api/tasks/annotation/disable_annotation_reply_task.py index 5d5d1d3ad8..0076113ce8 100644 --- a/api/tasks/annotation/disable_annotation_reply_task.py +++ b/api/tasks/annotation/disable_annotation_reply_task.py @@ -16,25 +16,25 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str): """ Async enable annotation reply task """ - logging.info(click.style("Start delete app annotations index: {}".format(app_id), fg="green")) + logging.info(click.style(f"Start delete app annotations index: {app_id}", fg="green")) start_at = time.perf_counter() # get app info app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() annotations_count = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id).count() if not app: - logging.info(click.style("App not found: {}".format(app_id), fg="red")) + logging.info(click.style(f"App not found: {app_id}", fg="red")) db.session.close() return app_annotation_setting = db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first() if not app_annotation_setting: - logging.info(click.style("App annotation setting not found: {}".format(app_id), fg="red")) + logging.info(click.style(f"App annotation setting not found: {app_id}", fg="red")) db.session.close() return - disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id)) - disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id)) + disable_app_annotation_key = f"disable_app_annotation_{str(app_id)}" + disable_app_annotation_job_key = f"disable_app_annotation_job_{str(job_id)}" try: dataset = Dataset( @@ -57,13 +57,11 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str): db.session.commit() end_at = time.perf_counter() - logging.info( - click.style("App annotations index deleted : {} latency: {}".format(app_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("Annotation batch deleted index failed") redis_client.setex(disable_app_annotation_job_key, 600, "error") - disable_app_annotation_error_key = "disable_app_annotation_error_{}".format(str(job_id)) + disable_app_annotation_error_key = f"disable_app_annotation_error_{str(job_id)}" redis_client.setex(disable_app_annotation_error_key, 600, str(e)) finally: redis_client.delete(disable_app_annotation_key) diff --git a/api/tasks/annotation/enable_annotation_reply_task.py b/api/tasks/annotation/enable_annotation_reply_task.py index 12d10df442..44c65c0783 100644 --- a/api/tasks/annotation/enable_annotation_reply_task.py +++ b/api/tasks/annotation/enable_annotation_reply_task.py @@ -27,19 +27,19 @@ def enable_annotation_reply_task( """ Async enable annotation reply task """ - logging.info(click.style("Start add app annotation to index: {}".format(app_id), fg="green")) + logging.info(click.style(f"Start add app annotation to index: {app_id}", fg="green")) start_at = time.perf_counter() # get app info app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() if not app: - logging.info(click.style("App not found: {}".format(app_id), fg="red")) + logging.info(click.style(f"App not found: {app_id}", fg="red")) db.session.close() return annotations = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id).all() - enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id)) - enable_app_annotation_job_key = "enable_app_annotation_job_{}".format(str(job_id)) + enable_app_annotation_key = f"enable_app_annotation_{str(app_id)}" + enable_app_annotation_job_key = f"enable_app_annotation_job_{str(job_id)}" try: documents = [] @@ -68,7 +68,7 @@ def enable_annotation_reply_task( try: old_vector.delete() except Exception as e: - logging.info(click.style("Delete annotation index error: {}".format(str(e)), fg="red")) + logging.info(click.style(f"Delete annotation index error: {str(e)}", fg="red")) annotation_setting.score_threshold = score_threshold annotation_setting.collection_binding_id = dataset_collection_binding.id annotation_setting.updated_user_id = user_id @@ -104,18 +104,16 @@ def enable_annotation_reply_task( try: vector.delete_by_metadata_field("app_id", app_id) except Exception as e: - logging.info(click.style("Delete annotation index error: {}".format(str(e)), fg="red")) + logging.info(click.style(f"Delete annotation index error: {str(e)}", fg="red")) vector.create(documents) db.session.commit() redis_client.setex(enable_app_annotation_job_key, 600, "completed") end_at = time.perf_counter() - logging.info( - click.style("App annotations added to index: {} latency: {}".format(app_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"App annotations added to index: {app_id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("Annotation batch created index failed") redis_client.setex(enable_app_annotation_job_key, 600, "error") - enable_app_annotation_error_key = "enable_app_annotation_error_{}".format(str(job_id)) + enable_app_annotation_error_key = f"enable_app_annotation_error_{str(job_id)}" redis_client.setex(enable_app_annotation_error_key, 600, str(e)) db.session.rollback() finally: diff --git a/api/tasks/annotation/update_annotation_to_index_task.py b/api/tasks/annotation/update_annotation_to_index_task.py index 596ba829ad..5f11d5aa00 100644 --- a/api/tasks/annotation/update_annotation_to_index_task.py +++ b/api/tasks/annotation/update_annotation_to_index_task.py @@ -25,7 +25,7 @@ def update_annotation_to_index_task( Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) """ - logging.info(click.style("Start update index for annotation: {}".format(annotation_id), fg="green")) + logging.info(click.style(f"Start update index for annotation: {annotation_id}", fg="green")) start_at = time.perf_counter() try: @@ -51,7 +51,7 @@ def update_annotation_to_index_task( end_at = time.perf_counter() logging.info( click.style( - "Build index successful for annotation: {} latency: {}".format(annotation_id, end_at - start_at), + f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index 49bff72a96..e64a799146 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -49,7 +49,8 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form except Exception: logging.exception( "Delete image_files failed when storage deleted, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) db.session.delete(segment) @@ -61,14 +62,14 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form try: storage.delete(file.key) except Exception: - logging.exception("Delete file failed when document deleted, file_id: {}".format(file.id)) + logging.exception("Delete file failed when document deleted, file_id: %s", file.id) db.session.delete(file) db.session.commit() end_at = time.perf_counter() logging.info( click.style( - "Cleaned documents when documents deleted latency: {}".format(end_at - start_at), + f"Cleaned documents when documents deleted latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index 64df3175e1..714e30acc3 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -1,9 +1,12 @@ import datetime import logging +import tempfile import time import uuid +from pathlib import Path import click +import pandas as pd from celery import shared_task # type: ignore from sqlalchemy import func from sqlalchemy.orm import Session @@ -12,15 +15,17 @@ from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType from extensions.ext_database import db from extensions.ext_redis import redis_client +from extensions.ext_storage import storage from libs import helper from models.dataset import Dataset, Document, DocumentSegment +from models.model import UploadFile from services.vector_service import VectorService @shared_task(queue="dataset") def batch_create_segment_to_index_task( job_id: str, - content: list, + upload_file_id: str, dataset_id: str, document_id: str, tenant_id: str, @@ -29,18 +34,18 @@ def batch_create_segment_to_index_task( """ Async batch create segment to index :param job_id: - :param content: + :param upload_file_id: :param dataset_id: :param document_id: :param tenant_id: :param user_id: - Usage: batch_create_segment_to_index_task.delay(job_id, content, dataset_id, document_id, tenant_id, user_id) + Usage: batch_create_segment_to_index_task.delay(job_id, upload_file_id, dataset_id, document_id, tenant_id, user_id) """ - logging.info(click.style("Start batch create segment jobId: {}".format(job_id), fg="green")) + logging.info(click.style(f"Start batch create segment jobId: {job_id}", fg="green")) start_at = time.perf_counter() - indexing_cache_key = "segment_batch_import_{}".format(job_id) + indexing_cache_key = f"segment_batch_import_{job_id}" try: with Session(db.engine) as session: @@ -58,6 +63,29 @@ def batch_create_segment_to_index_task( or dataset_document.indexing_status != "completed" ): raise ValueError("Document is not available.") + + upload_file = session.get(UploadFile, upload_file_id) + if not upload_file: + raise ValueError("UploadFile not found.") + + with tempfile.TemporaryDirectory() as temp_dir: + suffix = Path(upload_file.key).suffix + # FIXME mypy: Cannot determine type of 'tempfile._get_candidate_names' better not use it here + file_path = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}" # type: ignore + storage.download(upload_file.key, file_path) + + # Skip the first row + df = pd.read_csv(file_path) + content = [] + for index, row in df.iterrows(): + if dataset_document.doc_form == "qa_model": + data = {"content": row.iloc[0], "answer": row.iloc[1]} + else: + data = {"content": row.iloc[0]} + content.append(data) + if len(content) == 0: + raise ValueError("The CSV file is empty.") + document_segments = [] embedding_model = None if dataset.indexing_technique == "high_quality": @@ -115,7 +143,7 @@ def batch_create_segment_to_index_task( end_at = time.perf_counter() logging.info( click.style( - "Segment batch created job: {} latency: {}".format(job_id, end_at - start_at), + f"Segment batch created job: {job_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py index fad090141a..fe6d613b1c 100644 --- a/api/tasks/clean_dataset_task.py +++ b/api/tasks/clean_dataset_task.py @@ -42,7 +42,7 @@ def clean_dataset_task( Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) """ - logging.info(click.style("Start clean dataset when dataset deleted: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"Start clean dataset when dataset deleted: {dataset_id}", fg="green")) start_at = time.perf_counter() try: @@ -57,9 +57,9 @@ def clean_dataset_task( segments = db.session.query(DocumentSegment).where(DocumentSegment.dataset_id == dataset_id).all() if documents is None or len(documents) == 0: - logging.info(click.style("No documents found for dataset: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green")) else: - logging.info(click.style("Cleaning documents for dataset: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"Cleaning documents for dataset: {dataset_id}", fg="green")) # Specify the index type before initializing the index processor if doc_form is None: raise ValueError("Index type must be specified.") @@ -80,7 +80,8 @@ def clean_dataset_task( except Exception: logging.exception( "Delete image_files failed when storage deleted, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) db.session.delete(segment) @@ -115,9 +116,7 @@ def clean_dataset_task( db.session.commit() end_at = time.perf_counter() logging.info( - click.style( - "Cleaned dataset when dataset deleted: {} latency: {}".format(dataset_id, end_at - start_at), fg="green" - ) + click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green") ) except Exception: logging.exception("Cleaned dataset when dataset deleted failed") diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py index dd7a544ff5..d690106d17 100644 --- a/api/tasks/clean_document_task.py +++ b/api/tasks/clean_document_task.py @@ -24,7 +24,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i Usage: clean_document_task.delay(document_id, dataset_id) """ - logging.info(click.style("Start clean document when document deleted: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start clean document when document deleted: {document_id}", fg="green")) start_at = time.perf_counter() try: @@ -51,7 +51,8 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i except Exception: logging.exception( "Delete image_files failed when storage deleted, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) db.session.delete(segment) @@ -63,7 +64,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i try: storage.delete(file.key) except Exception: - logging.exception("Delete file failed when document deleted, file_id: {}".format(file_id)) + logging.exception("Delete file failed when document deleted, file_id: %s", file_id) db.session.delete(file) db.session.commit() @@ -77,7 +78,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i end_at = time.perf_counter() logging.info( click.style( - "Cleaned document when document deleted: {} latency: {}".format(document_id, end_at - start_at), + f"Cleaned document when document deleted: {document_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py index 0f72f87f15..bf1a92f038 100644 --- a/api/tasks/clean_notion_document_task.py +++ b/api/tasks/clean_notion_document_task.py @@ -19,7 +19,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str): Usage: clean_notion_document_task.delay(document_ids, dataset_id) """ logging.info( - click.style("Start clean document when import form notion document deleted: {}".format(dataset_id), fg="green") + click.style(f"Start clean document when import form notion document deleted: {dataset_id}", fg="green") ) start_at = time.perf_counter() diff --git a/api/tasks/create_segment_to_index_task.py b/api/tasks/create_segment_to_index_task.py index 5eda24674a..a8839ffc17 100644 --- a/api/tasks/create_segment_to_index_task.py +++ b/api/tasks/create_segment_to_index_task.py @@ -21,19 +21,19 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] :param keywords: Usage: create_segment_to_index_task.delay(segment_id) """ - logging.info(click.style("Start create segment to index: {}".format(segment_id), fg="green")) + logging.info(click.style(f"Start create segment to index: {segment_id}", fg="green")) start_at = time.perf_counter() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() if not segment: - logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment not found: {segment_id}", fg="red")) db.session.close() return if segment.status != "waiting": return - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" try: # update segment status to indexing @@ -57,17 +57,17 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] dataset = segment.dataset if not dataset: - logging.info(click.style("Segment {} has no dataset, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan")) return dataset_document = segment.document if not dataset_document: - logging.info(click.style("Segment {} has no document, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan")) return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Segment {} document status is invalid, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan")) return index_type = dataset.doc_form @@ -84,9 +84,7 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] db.session.commit() end_at = time.perf_counter() - logging.info( - click.style("Segment created to index: {} latency: {}".format(segment.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Segment created to index: {segment.id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("create segment to index failed") segment.enabled = False diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py index 7478bf5a90..8c4c1876ad 100644 --- a/api/tasks/deal_dataset_vector_index_task.py +++ b/api/tasks/deal_dataset_vector_index_task.py @@ -20,7 +20,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): :param action: action Usage: deal_dataset_vector_index_task.delay(dataset_id, action) """ - logging.info(click.style("Start deal dataset vector index: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"Start deal dataset vector index: {dataset_id}", fg="green")) start_at = time.perf_counter() try: @@ -162,9 +162,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): index_processor.clean(dataset, None, with_keywords=False, delete_child_chunks=False) end_at = time.perf_counter() - logging.info( - click.style("Deal dataset vector index: {} latency: {}".format(dataset_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Deal dataset vector index: {dataset_id} latency: {end_at - start_at}", fg="green")) except Exception: logging.exception("Deal dataset vector index failed") finally: diff --git a/api/tasks/delete_account_task.py b/api/tasks/delete_account_task.py index d3b33e3052..ef50adf8d5 100644 --- a/api/tasks/delete_account_task.py +++ b/api/tasks/delete_account_task.py @@ -16,11 +16,11 @@ def delete_account_task(account_id): try: BillingService.delete_account(account_id) except Exception as e: - logger.exception(f"Failed to delete account {account_id} from billing service.") + logger.exception("Failed to delete account %s from billing service.", account_id) raise if not account: - logger.error(f"Account {account_id} not found.") + logger.error("Account %s not found.", account_id) return # send success email send_deletion_success_task.delay(account.email) diff --git a/api/tasks/delete_segment_from_index_task.py b/api/tasks/delete_segment_from_index_task.py index 66ff0f9a0a..da12355d23 100644 --- a/api/tasks/delete_segment_from_index_task.py +++ b/api/tasks/delete_segment_from_index_task.py @@ -38,7 +38,7 @@ def delete_segment_from_index_task(index_node_ids: list, dataset_id: str, docume index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) end_at = time.perf_counter() - logging.info(click.style("Segment deleted from index latency: {}".format(end_at - start_at), fg="green")) + logging.info(click.style(f"Segment deleted from index latency: {end_at - start_at}", fg="green")) except Exception: logging.exception("delete segment from index failed") finally: diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py index e67ba5c76e..fa4ec15f8a 100644 --- a/api/tasks/disable_segment_from_index_task.py +++ b/api/tasks/disable_segment_from_index_task.py @@ -18,37 +18,37 @@ def disable_segment_from_index_task(segment_id: str): Usage: disable_segment_from_index_task.delay(segment_id) """ - logging.info(click.style("Start disable segment from index: {}".format(segment_id), fg="green")) + logging.info(click.style(f"Start disable segment from index: {segment_id}", fg="green")) start_at = time.perf_counter() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() if not segment: - logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment not found: {segment_id}", fg="red")) db.session.close() return if segment.status != "completed": - logging.info(click.style("Segment is not completed, disable is not allowed: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red")) db.session.close() return - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" try: dataset = segment.dataset if not dataset: - logging.info(click.style("Segment {} has no dataset, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan")) return dataset_document = segment.document if not dataset_document: - logging.info(click.style("Segment {} has no document, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan")) return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Segment {} document status is invalid, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan")) return index_type = dataset_document.doc_form @@ -56,9 +56,7 @@ def disable_segment_from_index_task(segment_id: str): index_processor.clean(dataset, [segment.index_node_id]) end_at = time.perf_counter() - logging.info( - click.style("Segment removed from index: {} latency: {}".format(segment.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Segment removed from index: {segment.id} latency: {end_at - start_at}", fg="green")) except Exception: logging.exception("remove segment from index failed") segment.enabled = True diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py index 0c8b1aabc7..f033f05084 100644 --- a/api/tasks/disable_segments_from_index_task.py +++ b/api/tasks/disable_segments_from_index_task.py @@ -25,18 +25,18 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: - logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan")) + logging.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan")) db.session.close() return dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first() if not dataset_document: - logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} not found, pass.", fg="cyan")) db.session.close() return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan")) db.session.close() return # sync index processor @@ -61,7 +61,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) end_at = time.perf_counter() - logging.info(click.style("Segments removed from index latency: {}".format(end_at - start_at), fg="green")) + logging.info(click.style(f"Segments removed from index latency: {end_at - start_at}", fg="green")) except Exception: # update segment error msg db.session.query(DocumentSegment).where( @@ -78,6 +78,6 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen db.session.commit() finally: for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" redis_client.delete(indexing_cache_key) db.session.close() diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index dcc748ef18..56f330b964 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -22,13 +22,13 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): Usage: document_indexing_sync_task.delay(dataset_id, document_id) """ - logging.info(click.style("Start sync document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start sync document: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return @@ -108,10 +108,8 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): indexing_runner = IndexingRunner() indexing_runner.run([document]) end_at = time.perf_counter() - logging.info( - click.style("update document: {} latency: {}".format(document.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("document_indexing_sync_task failed, document_id: {}".format(document_id)) + logging.exception("document_indexing_sync_task failed, document_id: %s", document_id) diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py index ec6d10d93b..728db2e2dc 100644 --- a/api/tasks/document_indexing_task.py +++ b/api/tasks/document_indexing_task.py @@ -26,7 +26,7 @@ def document_indexing_task(dataset_id: str, document_ids: list): dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: - logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow")) + logging.info(click.style(f"Dataset is not found: {dataset_id}", fg="yellow")) db.session.close() return # check document limit @@ -60,7 +60,7 @@ def document_indexing_task(dataset_id: str, document_ids: list): return for document_id in document_ids: - logging.info(click.style("Start process document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start process document: {document_id}", fg="green")) document = ( db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() @@ -77,10 +77,10 @@ def document_indexing_task(dataset_id: str, document_ids: list): indexing_runner = IndexingRunner() indexing_runner.run(documents) end_at = time.perf_counter() - logging.info(click.style("Processed dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("Document indexing task failed, dataset_id: {}".format(dataset_id)) + logging.exception("Document indexing task failed, dataset_id: %s", dataset_id) finally: db.session.close() diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py index e53c38ddc3..053c0c5f41 100644 --- a/api/tasks/document_indexing_update_task.py +++ b/api/tasks/document_indexing_update_task.py @@ -20,13 +20,13 @@ def document_indexing_update_task(dataset_id: str, document_id: str): Usage: document_indexing_update_task.delay(dataset_id, document_id) """ - logging.info(click.style("Start update document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start update document: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return @@ -69,10 +69,10 @@ def document_indexing_update_task(dataset_id: str, document_id: str): indexing_runner = IndexingRunner() indexing_runner.run([document]) end_at = time.perf_counter() - logging.info(click.style("update document: {} latency: {}".format(document.id, end_at - start_at), fg="green")) + logging.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("document_indexing_update_task failed, document_id: {}".format(document_id)) + logging.exception("document_indexing_update_task failed, document_id: %s", document_id) finally: db.session.close() diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py index b3ddface59..faa7e2b8d0 100644 --- a/api/tasks/duplicate_document_indexing_task.py +++ b/api/tasks/duplicate_document_indexing_task.py @@ -27,7 +27,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if dataset is None: - logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red")) + logging.info(click.style(f"Dataset not found: {dataset_id}", fg="red")) db.session.close() return @@ -63,7 +63,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): db.session.close() for document_id in document_ids: - logging.info(click.style("Start process document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start process document: {document_id}", fg="green")) document = ( db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() @@ -95,10 +95,10 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): indexing_runner = IndexingRunner() indexing_runner.run(documents) end_at = time.perf_counter() - logging.info(click.style("Processed dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("duplicate_document_indexing_task failed, dataset_id: {}".format(dataset_id)) + logging.exception("duplicate_document_indexing_task failed, dataset_id: %s", dataset_id) finally: db.session.close() diff --git a/api/tasks/enable_segment_to_index_task.py b/api/tasks/enable_segment_to_index_task.py index 13822f078e..f801c9d9ee 100644 --- a/api/tasks/enable_segment_to_index_task.py +++ b/api/tasks/enable_segment_to_index_task.py @@ -21,21 +21,21 @@ def enable_segment_to_index_task(segment_id: str): Usage: enable_segment_to_index_task.delay(segment_id) """ - logging.info(click.style("Start enable segment to index: {}".format(segment_id), fg="green")) + logging.info(click.style(f"Start enable segment to index: {segment_id}", fg="green")) start_at = time.perf_counter() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() if not segment: - logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment not found: {segment_id}", fg="red")) db.session.close() return if segment.status != "completed": - logging.info(click.style("Segment is not completed, enable is not allowed: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment is not completed, enable is not allowed: {segment_id}", fg="red")) db.session.close() return - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" try: document = Document( @@ -51,17 +51,17 @@ def enable_segment_to_index_task(segment_id: str): dataset = segment.dataset if not dataset: - logging.info(click.style("Segment {} has no dataset, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan")) return dataset_document = segment.document if not dataset_document: - logging.info(click.style("Segment {} has no document, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan")) return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Segment {} document status is invalid, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan")) return index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() @@ -85,9 +85,7 @@ def enable_segment_to_index_task(segment_id: str): index_processor.load(dataset, [document]) end_at = time.perf_counter() - logging.info( - click.style("Segment enabled to index: {} latency: {}".format(segment.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Segment enabled to index: {segment.id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("enable segment to index failed") segment.enabled = False diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py index e3fdf04d8c..777380631f 100644 --- a/api/tasks/enable_segments_to_index_task.py +++ b/api/tasks/enable_segments_to_index_task.py @@ -27,17 +27,17 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i start_at = time.perf_counter() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: - logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan")) + logging.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan")) return dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first() if not dataset_document: - logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} not found, pass.", fg="cyan")) db.session.close() return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan")) db.session.close() return # sync index processor @@ -53,7 +53,7 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i .all() ) if not segments: - logging.info(click.style("Segments not found: {}".format(segment_ids), fg="cyan")) + logging.info(click.style(f"Segments not found: {segment_ids}", fg="cyan")) db.session.close() return @@ -91,7 +91,7 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i index_processor.load(dataset, documents) end_at = time.perf_counter() - logging.info(click.style("Segments enabled to index latency: {}".format(end_at - start_at), fg="green")) + logging.info(click.style(f"Segments enabled to index latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("enable segments to index failed") # update segment error msg @@ -110,6 +110,6 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i db.session.commit() finally: for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" redis_client.delete(indexing_cache_key) db.session.close() diff --git a/api/tasks/mail_account_deletion_task.py b/api/tasks/mail_account_deletion_task.py index a6f8ce2f0b..38b5ca1800 100644 --- a/api/tasks/mail_account_deletion_task.py +++ b/api/tasks/mail_account_deletion_task.py @@ -37,12 +37,10 @@ def send_deletion_success_task(to: str, language: str = "en-US") -> None: end_at = time.perf_counter() logging.info( - click.style( - "Send account deletion success email to {}: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send account deletion success email to {to}: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send account deletion success email to {} failed".format(to)) + logging.exception("Send account deletion success email to %s failed", to) @shared_task(queue="mail") @@ -83,4 +81,4 @@ def send_account_deletion_verification_code(to: str, code: str, language: str = ) ) except Exception: - logging.exception("Send account deletion verification code email to {} failed".format(to)) + logging.exception("Send account deletion verification code email to %s failed", to) diff --git a/api/tasks/mail_change_mail_task.py b/api/tasks/mail_change_mail_task.py index ea1875901c..054053558d 100644 --- a/api/tasks/mail_change_mail_task.py +++ b/api/tasks/mail_change_mail_task.py @@ -5,7 +5,7 @@ import click from celery import shared_task # type: ignore from extensions.ext_mail import mail -from libs.email_i18n import get_email_i18n_service +from libs.email_i18n import EmailType, get_email_i18n_service @shared_task(queue="mail") @@ -22,7 +22,7 @@ def send_change_mail_task(language: str, to: str, code: str, phase: str) -> None if not mail.is_inited(): return - logging.info(click.style("Start change email mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start change email mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -34,9 +34,45 @@ def send_change_mail_task(language: str, to: str, code: str, phase: str) -> None phase=phase, ) + end_at = time.perf_counter() + logging.info(click.style(f"Send change email mail to {to} succeeded: latency: {end_at - start_at}", fg="green")) + except Exception: + logging.exception("Send change email mail to %s failed", to) + + +@shared_task(queue="mail") +def send_change_mail_completed_notification_task(language: str, to: str) -> None: + """ + Send change email completed notification with internationalization support. + + Args: + language: Language code for email localization + to: Recipient email address + """ + if not mail.is_inited(): + return + + logging.info(click.style(f"Start change email completed notify mail to {to}", fg="green")) + start_at = time.perf_counter() + + try: + email_service = get_email_i18n_service() + email_service.send_email( + email_type=EmailType.CHANGE_EMAIL_COMPLETED, + language_code=language, + to=to, + template_context={ + "to": to, + "email": to, + }, + ) + end_at = time.perf_counter() logging.info( - click.style("Send change email mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green") + click.style( + f"Send change email completed mail to {to} succeeded: latency: {end_at - start_at}", + fg="green", + ) ) except Exception: - logging.exception("Send change email mail to {} failed".format(to)) + logging.exception("Send change email completed mail to %s failed", to) diff --git a/api/tasks/mail_email_code_login.py b/api/tasks/mail_email_code_login.py index 34220784e9..a82ab55384 100644 --- a/api/tasks/mail_email_code_login.py +++ b/api/tasks/mail_email_code_login.py @@ -21,7 +21,7 @@ def send_email_code_login_mail_task(language: str, to: str, code: str) -> None: if not mail.is_inited(): return - logging.info(click.style("Start email code login mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start email code login mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -38,9 +38,7 @@ def send_email_code_login_mail_task(language: str, to: str, code: str) -> None: end_at = time.perf_counter() logging.info( - click.style( - "Send email code login mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send email code login mail to {to} succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send email code login mail to {} failed".format(to)) + logging.exception("Send email code login mail to %s failed", to) diff --git a/api/tasks/mail_enterprise_task.py b/api/tasks/mail_enterprise_task.py index a1c2908624..9c80da06e5 100644 --- a/api/tasks/mail_enterprise_task.py +++ b/api/tasks/mail_enterprise_task.py @@ -15,7 +15,7 @@ def send_enterprise_email_task(to: list[str], subject: str, body: str, substitut if not mail.is_inited(): return - logging.info(click.style("Start enterprise mail to {} with subject {}".format(to, subject), fg="green")) + logging.info(click.style(f"Start enterprise mail to {to} with subject {subject}", fg="green")) start_at = time.perf_counter() try: @@ -25,8 +25,6 @@ def send_enterprise_email_task(to: list[str], subject: str, body: str, substitut email_service.send_raw_email(to=to, subject=subject, html_content=html_content) end_at = time.perf_counter() - logging.info( - click.style("Send enterprise mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Send enterprise mail to {to} succeeded: latency: {end_at - start_at}", fg="green")) except Exception: - logging.exception("Send enterprise mail to {} failed".format(to)) + logging.exception("Send enterprise mail to %s failed", to) diff --git a/api/tasks/mail_invite_member_task.py b/api/tasks/mail_invite_member_task.py index 8c73de0111..ff351f08af 100644 --- a/api/tasks/mail_invite_member_task.py +++ b/api/tasks/mail_invite_member_task.py @@ -24,9 +24,7 @@ def send_invite_member_mail_task(language: str, to: str, token: str, inviter_nam if not mail.is_inited(): return - logging.info( - click.style("Start send invite member mail to {} in workspace {}".format(to, workspace_name), fg="green") - ) + logging.info(click.style(f"Start send invite member mail to {to} in workspace {workspace_name}", fg="green")) start_at = time.perf_counter() try: @@ -46,9 +44,7 @@ def send_invite_member_mail_task(language: str, to: str, token: str, inviter_nam end_at = time.perf_counter() logging.info( - click.style( - "Send invite member mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send invite member mail to {to} succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send invite member mail to {} failed".format(to)) + logging.exception("Send invite member mail to %s failed", to) diff --git a/api/tasks/mail_owner_transfer_task.py b/api/tasks/mail_owner_transfer_task.py index e566a6bc56..3856bf294a 100644 --- a/api/tasks/mail_owner_transfer_task.py +++ b/api/tasks/mail_owner_transfer_task.py @@ -22,7 +22,7 @@ def send_owner_transfer_confirm_task(language: str, to: str, code: str, workspac if not mail.is_inited(): return - logging.info(click.style("Start owner transfer confirm mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start owner transfer confirm mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -41,12 +41,12 @@ def send_owner_transfer_confirm_task(language: str, to: str, code: str, workspac end_at = time.perf_counter() logging.info( click.style( - "Send owner transfer confirm mail to {} succeeded: latency: {}".format(to, end_at - start_at), + f"Send owner transfer confirm mail to {to} succeeded: latency: {end_at - start_at}", fg="green", ) ) except Exception: - logging.exception("owner transfer confirm email mail to {} failed".format(to)) + logging.exception("owner transfer confirm email mail to %s failed", to) @shared_task(queue="mail") @@ -63,7 +63,7 @@ def send_old_owner_transfer_notify_email_task(language: str, to: str, workspace: if not mail.is_inited(): return - logging.info(click.style("Start old owner transfer notify mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start old owner transfer notify mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -82,12 +82,12 @@ def send_old_owner_transfer_notify_email_task(language: str, to: str, workspace: end_at = time.perf_counter() logging.info( click.style( - "Send old owner transfer notify mail to {} succeeded: latency: {}".format(to, end_at - start_at), + f"Send old owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}", fg="green", ) ) except Exception: - logging.exception("old owner transfer notify email mail to {} failed".format(to)) + logging.exception("old owner transfer notify email mail to %s failed", to) @shared_task(queue="mail") @@ -103,7 +103,7 @@ def send_new_owner_transfer_notify_email_task(language: str, to: str, workspace: if not mail.is_inited(): return - logging.info(click.style("Start new owner transfer notify mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start new owner transfer notify mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -121,9 +121,9 @@ def send_new_owner_transfer_notify_email_task(language: str, to: str, workspace: end_at = time.perf_counter() logging.info( click.style( - "Send new owner transfer notify mail to {} succeeded: latency: {}".format(to, end_at - start_at), + f"Send new owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}", fg="green", ) ) except Exception: - logging.exception("new owner transfer notify email mail to {} failed".format(to)) + logging.exception("new owner transfer notify email mail to %s failed", to) diff --git a/api/tasks/mail_reset_password_task.py b/api/tasks/mail_reset_password_task.py index e2482f2101..b01af7827b 100644 --- a/api/tasks/mail_reset_password_task.py +++ b/api/tasks/mail_reset_password_task.py @@ -21,7 +21,7 @@ def send_reset_password_mail_task(language: str, to: str, code: str) -> None: if not mail.is_inited(): return - logging.info(click.style("Start password reset mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start password reset mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -38,9 +38,7 @@ def send_reset_password_mail_task(language: str, to: str, code: str) -> None: end_at = time.perf_counter() logging.info( - click.style( - "Send password reset mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send password reset mail to {to} succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send password reset mail to {} failed".format(to)) + logging.exception("Send password reset mail to %s failed", to) diff --git a/api/tasks/ops_trace_task.py b/api/tasks/ops_trace_task.py index 2e77332ffe..c7e0047664 100644 --- a/api/tasks/ops_trace_task.py +++ b/api/tasks/ops_trace_task.py @@ -43,13 +43,11 @@ def process_trace_tasks(file_info): if trace_type: trace_info = trace_type(**trace_info) trace_instance.trace(trace_info) - logging.info(f"Processing trace tasks success, app_id: {app_id}") + logging.info("Processing trace tasks success, app_id: %s", app_id) except Exception as e: - logging.info( - f"error:\n\n\n{e}\n\n\n\n", - ) + logging.info("error:\n\n\n%s\n\n\n\n", e) failed_key = f"{OPS_TRACE_FAILED_KEY}_{app_id}" redis_client.incr(failed_key) - logging.info(f"Processing trace tasks failed, app_id: {app_id}") + logging.info("Processing trace tasks failed, app_id: %s", app_id) finally: storage.delete(file_path) diff --git a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py index 6fcdad0525..9ea6aa6214 100644 --- a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py +++ b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py @@ -58,7 +58,7 @@ def process_tenant_plugin_autoupgrade_check_task( click.echo( click.style( - "Checking upgradable plugin for tenant: {}".format(tenant_id), + f"Checking upgradable plugin for tenant: {tenant_id}", fg="green", ) ) @@ -68,7 +68,7 @@ def process_tenant_plugin_autoupgrade_check_task( # get plugin_ids to check plugin_ids: list[tuple[str, str, str]] = [] # plugin_id, version, unique_identifier - click.echo(click.style("Upgrade mode: {}".format(upgrade_mode), fg="green")) + click.echo(click.style(f"Upgrade mode: {upgrade_mode}", fg="green")) if upgrade_mode == TenantPluginAutoUpgradeStrategy.UpgradeMode.PARTIAL and include_plugins: all_plugins = manager.list_plugins(tenant_id) @@ -142,7 +142,7 @@ def process_tenant_plugin_autoupgrade_check_task( marketplace.record_install_plugin_event(new_unique_identifier) click.echo( click.style( - "Upgrade plugin: {} -> {}".format(original_unique_identifier, new_unique_identifier), + f"Upgrade plugin: {original_unique_identifier} -> {new_unique_identifier}", fg="green", ) ) @@ -156,11 +156,11 @@ def process_tenant_plugin_autoupgrade_check_task( }, ) except Exception as e: - click.echo(click.style("Error when upgrading plugin: {}".format(e), fg="red")) + click.echo(click.style(f"Error when upgrading plugin: {e}", fg="red")) traceback.print_exc() break except Exception as e: - click.echo(click.style("Error when checking upgradable plugin: {}".format(e), fg="red")) + click.echo(click.style(f"Error when checking upgradable plugin: {e}", fg="red")) traceback.print_exc() return diff --git a/api/tasks/recover_document_indexing_task.py b/api/tasks/recover_document_indexing_task.py index dfb2389579..ff489340cd 100644 --- a/api/tasks/recover_document_indexing_task.py +++ b/api/tasks/recover_document_indexing_task.py @@ -18,13 +18,13 @@ def recover_document_indexing_task(dataset_id: str, document_id: str): Usage: recover_document_indexing_task.delay(dataset_id, document_id) """ - logging.info(click.style("Recover document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Recover document: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return @@ -37,12 +37,10 @@ def recover_document_indexing_task(dataset_id: str, document_id: str): elif document.indexing_status == "indexing": indexing_runner.run_in_indexing_status(document) end_at = time.perf_counter() - logging.info( - click.style("Processed document: {} latency: {}".format(document.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Processed document: {document.id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("recover_document_indexing_task failed, document_id: {}".format(document_id)) + logging.exception("recover_document_indexing_task failed, document_id: %s", document_id) finally: db.session.close() diff --git a/api/tasks/remove_app_and_related_data_task.py b/api/tasks/remove_app_and_related_data_task.py index 1619f8c546..b6f772dd60 100644 --- a/api/tasks/remove_app_and_related_data_task.py +++ b/api/tasks/remove_app_and_related_data_task.py @@ -201,7 +201,7 @@ def _delete_app_workflow_runs(tenant_id: str, app_id: str): batch_size=1000, ) - logging.info(f"Deleted {deleted_count} workflow runs for app {app_id}") + logging.info("Deleted %s workflow runs for app %s", deleted_count, app_id) def _delete_app_workflow_node_executions(tenant_id: str, app_id: str): @@ -215,7 +215,7 @@ def _delete_app_workflow_node_executions(tenant_id: str, app_id: str): batch_size=1000, ) - logging.info(f"Deleted {deleted_count} workflow node executions for app {app_id}") + logging.info("Deleted %s workflow node executions for app %s", deleted_count, app_id) def _delete_app_workflow_app_logs(tenant_id: str, app_id: str): @@ -342,6 +342,6 @@ def _delete_records(query_sql: str, params: dict, delete_func: Callable, name: s db.session.commit() logging.info(click.style(f"Deleted {name} {record_id}", fg="green")) except Exception: - logging.exception(f"Error occurred while deleting {name} {record_id}") + logging.exception("Error occurred while deleting %s %s", name, record_id) continue rs.close() diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py index 3f73cc7b40..524130a297 100644 --- a/api/tasks/remove_document_from_index_task.py +++ b/api/tasks/remove_document_from_index_task.py @@ -19,21 +19,21 @@ def remove_document_from_index_task(document_id: str): Usage: remove_document_from_index.delay(document_id) """ - logging.info(click.style("Start remove document segments from index: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start remove document segments from index: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return if document.indexing_status != "completed": - logging.info(click.style("Document is not completed, remove is not allowed: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document is not completed, remove is not allowed: {document_id}", fg="red")) db.session.close() return - indexing_cache_key = "document_{}_indexing".format(document.id) + indexing_cache_key = f"document_{document.id}_indexing" try: dataset = document.dataset @@ -49,7 +49,7 @@ def remove_document_from_index_task(document_id: str): try: index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) except Exception: - logging.exception(f"clean dataset {dataset.id} from index failed") + logging.exception("clean dataset %s from index failed", dataset.id) # update segment to disable db.session.query(DocumentSegment).where(DocumentSegment.document_id == document.id).update( { @@ -63,9 +63,7 @@ def remove_document_from_index_task(document_id: str): end_at = time.perf_counter() logging.info( - click.style( - "Document removed from index: {} latency: {}".format(document.id, end_at - start_at), fg="green" - ) + click.style(f"Document removed from index: {document.id} latency: {end_at - start_at}", fg="green") ) except Exception: logging.exception("remove document from index failed") diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index 58f0156afb..a868cb500b 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -27,12 +27,12 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: - logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red")) + logging.info(click.style(f"Dataset not found: {dataset_id}", fg="red")) db.session.close() return tenant_id = dataset.tenant_id for document_id in document_ids: - retry_indexing_cache_key = "document_{}_is_retried".format(document_id) + retry_indexing_cache_key = f"document_{document_id}_is_retried" # check document limit features = FeatureService.get_features(tenant_id) try: @@ -57,12 +57,12 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): db.session.close() return - logging.info(click.style("Start retry document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start retry document: {document_id}", fg="green")) document = ( db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() ) if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="yellow")) + logging.info(click.style(f"Document not found: {document_id}", fg="yellow")) db.session.close() return try: @@ -95,8 +95,8 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): db.session.commit() logging.info(click.style(str(ex), fg="yellow")) redis_client.delete(retry_indexing_cache_key) - logging.exception("retry_document_indexing_task failed, document_id: {}".format(document_id)) + logging.exception("retry_document_indexing_task failed, document_id: %s", document_id) finally: db.session.close() end_at = time.perf_counter() - logging.info(click.style("Retry dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Retry dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) diff --git a/api/tasks/sync_website_document_indexing_task.py b/api/tasks/sync_website_document_indexing_task.py index 539c2db80f..f112a97d2f 100644 --- a/api/tasks/sync_website_document_indexing_task.py +++ b/api/tasks/sync_website_document_indexing_task.py @@ -28,7 +28,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): if dataset is None: raise ValueError("Dataset not found") - sync_indexing_cache_key = "document_{}_is_sync".format(document_id) + sync_indexing_cache_key = f"document_{document_id}_is_sync" # check document limit features = FeatureService.get_features(dataset.tenant_id) try: @@ -52,10 +52,10 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): redis_client.delete(sync_indexing_cache_key) return - logging.info(click.style("Start sync website document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start sync website document: {document_id}", fg="green")) document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="yellow")) + logging.info(click.style(f"Document not found: {document_id}", fg="yellow")) return try: # clean old data @@ -87,6 +87,6 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): db.session.commit() logging.info(click.style(str(ex), fg="yellow")) redis_client.delete(sync_indexing_cache_key) - logging.exception("sync_website_document_indexing_task failed, document_id: {}".format(document_id)) + logging.exception("sync_website_document_indexing_task failed, document_id: %s", document_id) end_at = time.perf_counter() - logging.info(click.style("Sync document: {} latency: {}".format(document_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Sync document: {document_id} latency: {end_at - start_at}", fg="green")) diff --git a/api/templates/change_mail_completed_template_en-US.html b/api/templates/change_mail_completed_template_en-US.html new file mode 100644 index 0000000000..ecaf35868d --- /dev/null +++ b/api/templates/change_mail_completed_template_en-US.html @@ -0,0 +1,135 @@ + + + +
+ + + + +
+ Your login email has been changed
+You can now log into Dify with your new email address:
+If you did not make this change, email support@dify.ai.
+
+ 您的登录邮箱已更改
+您现在可以使用新的电子邮件地址登录 Dify:
+如果您没有进行此更改,请发送电子邮件至 support@dify.ai。
+Your login email has been changed
+You can now log into {{application_title}} with your new email address:
+If you did not make this change, please ignore this email or contact support immediately.
+您的登录邮箱已更改
+您现在可以使用新的电子邮件地址登录 {{application_title}}:
+如果您没有进行此更改,请忽略此电子邮件或立即联系支持。
+