diff --git a/api/README.md b/api/README.md
index 70ca2e86a8..bab33f9293 100644
--- a/api/README.md
+++ b/api/README.md
@@ -65,14 +65,12 @@
8. Start Dify [web](../web) service.
9. Setup your application by visiting `http://localhost:3000`...
-10. If you need to debug local async processing, please start the worker service.
+10. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service.
```bash
poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion
```
- The started celery app handles the async tasks, e.g. dataset importing and documents indexing.
-
## Testing
1. Install dependencies for both the backend and the test environment
diff --git a/api/commands.py b/api/commands.py
index 3a6b4963cf..7ef4aed7f7 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -28,28 +28,28 @@ from services.account_service import RegisterService, TenantService
@click.command("reset-password", help="Reset the account password.")
-@click.option("--email", prompt=True, help="The email address of the account whose password you need to reset")
-@click.option("--new-password", prompt=True, help="the new password.")
-@click.option("--password-confirm", prompt=True, help="the new password confirm.")
+@click.option("--email", prompt=True, help="Account email to reset password for")
+@click.option("--new-password", prompt=True, help="New password")
+@click.option("--password-confirm", prompt=True, help="Confirm new password")
def reset_password(email, new_password, password_confirm):
"""
Reset password of owner account
Only available in SELF_HOSTED mode
"""
if str(new_password).strip() != str(password_confirm).strip():
- click.echo(click.style("sorry. The two passwords do not match.", fg="red"))
+ click.echo(click.style("Passwords do not match.", fg="red"))
return
account = db.session.query(Account).filter(Account.email == email).one_or_none()
if not account:
- click.echo(click.style("sorry. the account: [{}] not exist .".format(email), fg="red"))
+ click.echo(click.style("Account not found for email: {}".format(email), fg="red"))
return
try:
valid_password(new_password)
except:
- click.echo(click.style("sorry. The passwords must match {} ".format(password_pattern), fg="red"))
+ click.echo(click.style("Invalid password. Must match {}".format(password_pattern), fg="red"))
return
# generate password salt
@@ -62,37 +62,37 @@ def reset_password(email, new_password, password_confirm):
account.password = base64_password_hashed
account.password_salt = base64_salt
db.session.commit()
- click.echo(click.style("Congratulations! Password has been reset.", fg="green"))
+ click.echo(click.style("Password reset successfully.", fg="green"))
@click.command("reset-email", help="Reset the account email.")
-@click.option("--email", prompt=True, help="The old email address of the account whose email you need to reset")
-@click.option("--new-email", prompt=True, help="the new email.")
-@click.option("--email-confirm", prompt=True, help="the new email confirm.")
+@click.option("--email", prompt=True, help="Current account email")
+@click.option("--new-email", prompt=True, help="New email")
+@click.option("--email-confirm", prompt=True, help="Confirm new email")
def reset_email(email, new_email, email_confirm):
"""
Replace account email
:return:
"""
if str(new_email).strip() != str(email_confirm).strip():
- click.echo(click.style("Sorry, new email and confirm email do not match.", fg="red"))
+ click.echo(click.style("New emails do not match.", fg="red"))
return
account = db.session.query(Account).filter(Account.email == email).one_or_none()
if not account:
- click.echo(click.style("sorry. the account: [{}] not exist .".format(email), fg="red"))
+ click.echo(click.style("Account not found for email: {}".format(email), fg="red"))
return
try:
email_validate(new_email)
except:
- click.echo(click.style("sorry. {} is not a valid email. ".format(email), fg="red"))
+ click.echo(click.style("Invalid email: {}".format(new_email), fg="red"))
return
account.email = new_email
db.session.commit()
- click.echo(click.style("Congratulations!, email has been reset.", fg="green"))
+ click.echo(click.style("Email updated successfully.", fg="green"))
@click.command(
@@ -104,7 +104,7 @@ def reset_email(email, new_email, email_confirm):
)
@click.confirmation_option(
prompt=click.style(
- "Are you sure you want to reset encrypt key pair? this operation cannot be rolled back!", fg="red"
+ "Are you sure you want to reset encrypt key pair? This operation cannot be rolled back!", fg="red"
)
)
def reset_encrypt_key_pair():
@@ -114,13 +114,13 @@ def reset_encrypt_key_pair():
Only support SELF_HOSTED mode.
"""
if dify_config.EDITION != "SELF_HOSTED":
- click.echo(click.style("Sorry, only support SELF_HOSTED mode.", fg="red"))
+ click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red"))
return
tenants = db.session.query(Tenant).all()
for tenant in tenants:
if not tenant:
- click.echo(click.style("Sorry, no workspace found. Please enter /install to initialize.", fg="red"))
+ click.echo(click.style("No workspaces found. Run /install first.", fg="red"))
return
tenant.encrypt_public_key = generate_key_pair(tenant.id)
@@ -137,7 +137,7 @@ def reset_encrypt_key_pair():
)
-@click.command("vdb-migrate", help="migrate vector db.")
+@click.command("vdb-migrate", help="Migrate vector db.")
@click.option("--scope", default="all", prompt=False, help="The scope of vector database to migrate, Default is All.")
def vdb_migrate(scope: str):
if scope in {"knowledge", "all"}:
@@ -150,7 +150,7 @@ def migrate_annotation_vector_database():
"""
Migrate annotation datas to target vector database .
"""
- click.echo(click.style("Start migrate annotation data.", fg="green"))
+ click.echo(click.style("Starting annotation data migration.", fg="green"))
create_count = 0
skipped_count = 0
total_count = 0
@@ -174,14 +174,14 @@ def migrate_annotation_vector_database():
f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped."
)
try:
- click.echo("Create app annotation index: {}".format(app.id))
+ click.echo("Creating app annotation index: {}".format(app.id))
app_annotation_setting = (
db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app.id).first()
)
if not app_annotation_setting:
skipped_count = skipped_count + 1
- click.echo("App annotation setting is disabled: {}".format(app.id))
+ click.echo("App annotation setting disabled: {}".format(app.id))
continue
# get dataset_collection_binding info
dataset_collection_binding = (
@@ -190,7 +190,7 @@ def migrate_annotation_vector_database():
.first()
)
if not dataset_collection_binding:
- click.echo("App annotation collection binding is not exist: {}".format(app.id))
+ click.echo("App annotation collection binding not found: {}".format(app.id))
continue
annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app.id).all()
dataset = Dataset(
@@ -211,11 +211,11 @@ def migrate_annotation_vector_database():
documents.append(document)
vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
- click.echo(f"Start to migrate annotation, app_id: {app.id}.")
+ click.echo(f"Migrating annotations for app: {app.id}.")
try:
vector.delete()
- click.echo(click.style(f"Successfully delete vector index for app: {app.id}.", fg="green"))
+ click.echo(click.style(f"Deleted vector index for app {app.id}.", fg="green"))
except Exception as e:
click.echo(click.style(f"Failed to delete vector index for app {app.id}.", fg="red"))
raise e
@@ -223,12 +223,12 @@ def migrate_annotation_vector_database():
try:
click.echo(
click.style(
- f"Start to created vector index with {len(documents)} annotations for app {app.id}.",
+ f"Creating vector index with {len(documents)} annotations for app {app.id}.",
fg="green",
)
)
vector.create(documents)
- click.echo(click.style(f"Successfully created vector index for app {app.id}.", fg="green"))
+ click.echo(click.style(f"Created vector index for app {app.id}.", fg="green"))
except Exception as e:
click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red"))
raise e
@@ -237,14 +237,14 @@ def migrate_annotation_vector_database():
except Exception as e:
click.echo(
click.style(
- "Create app annotation index error: {} {}".format(e.__class__.__name__, str(e)), fg="red"
+ "Error creating app annotation index: {} {}".format(e.__class__.__name__, str(e)), fg="red"
)
)
continue
click.echo(
click.style(
- f"Congratulations! Create {create_count} app annotation indexes, and skipped {skipped_count} apps.",
+ f"Migration complete. Created {create_count} app annotation indexes. Skipped {skipped_count} apps.",
fg="green",
)
)
@@ -254,7 +254,7 @@ def migrate_knowledge_vector_database():
"""
Migrate vector database datas to target vector database .
"""
- click.echo(click.style("Start migrate vector db.", fg="green"))
+ click.echo(click.style("Starting vector database migration.", fg="green"))
create_count = 0
skipped_count = 0
total_count = 0
@@ -278,7 +278,7 @@ def migrate_knowledge_vector_database():
f"Processing the {total_count} dataset {dataset.id}. {create_count} created, {skipped_count} skipped."
)
try:
- click.echo("Create dataset vdb index: {}".format(dataset.id))
+ click.echo("Creating dataset vector database index: {}".format(dataset.id))
if dataset.index_struct_dict:
if dataset.index_struct_dict["type"] == vector_type:
skipped_count = skipped_count + 1
@@ -299,7 +299,7 @@ def migrate_knowledge_vector_database():
if dataset_collection_binding:
collection_name = dataset_collection_binding.collection_name
else:
- raise ValueError("Dataset Collection Bindings is not exist!")
+ raise ValueError("Dataset Collection Binding not found")
else:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
@@ -351,14 +351,12 @@ def migrate_knowledge_vector_database():
raise ValueError(f"Vector store {vector_type} is not supported.")
vector = Vector(dataset)
- click.echo(f"Start to migrate dataset {dataset.id}.")
+ click.echo(f"Migrating dataset {dataset.id}.")
try:
vector.delete()
click.echo(
- click.style(
- f"Successfully delete vector index {collection_name} for dataset {dataset.id}.", fg="green"
- )
+ click.style(f"Deleted vector index {collection_name} for dataset {dataset.id}.", fg="green")
)
except Exception as e:
click.echo(
@@ -410,15 +408,13 @@ def migrate_knowledge_vector_database():
try:
click.echo(
click.style(
- f"Start to created vector index with {len(documents)} documents of {segments_count}"
+ f"Creating vector index with {len(documents)} documents of {segments_count}"
f" segments for dataset {dataset.id}.",
fg="green",
)
)
vector.create(documents)
- click.echo(
- click.style(f"Successfully created vector index for dataset {dataset.id}.", fg="green")
- )
+ click.echo(click.style(f"Created vector index for dataset {dataset.id}.", fg="green"))
except Exception as e:
click.echo(click.style(f"Failed to created vector index for dataset {dataset.id}.", fg="red"))
raise e
@@ -429,13 +425,13 @@ def migrate_knowledge_vector_database():
except Exception as e:
db.session.rollback()
click.echo(
- click.style("Create dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red")
+ click.style("Error creating dataset index: {} {}".format(e.__class__.__name__, str(e)), fg="red")
)
continue
click.echo(
click.style(
- f"Congratulations! Create {create_count} dataset indexes, and skipped {skipped_count} datasets.", fg="green"
+ f"Migration complete. Created {create_count} dataset indexes. Skipped {skipped_count} datasets.", fg="green"
)
)
@@ -445,7 +441,7 @@ def convert_to_agent_apps():
"""
Convert Agent Assistant to Agent App.
"""
- click.echo(click.style("Start convert to agent apps.", fg="green"))
+ click.echo(click.style("Starting convert to agent apps.", fg="green"))
proceeded_app_ids = []
@@ -496,23 +492,23 @@ def convert_to_agent_apps():
except Exception as e:
click.echo(click.style("Convert app error: {} {}".format(e.__class__.__name__, str(e)), fg="red"))
- click.echo(click.style("Congratulations! Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green"))
+ click.echo(click.style("Conversion complete. Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green"))
-@click.command("add-qdrant-doc-id-index", help="add qdrant doc_id index.")
-@click.option("--field", default="metadata.doc_id", prompt=False, help="index field , default is metadata.doc_id.")
+@click.command("add-qdrant-doc-id-index", help="Add Qdrant doc_id index.")
+@click.option("--field", default="metadata.doc_id", prompt=False, help="Index field , default is metadata.doc_id.")
def add_qdrant_doc_id_index(field: str):
- click.echo(click.style("Start add qdrant doc_id index.", fg="green"))
+ click.echo(click.style("Starting Qdrant doc_id index creation.", fg="green"))
vector_type = dify_config.VECTOR_STORE
if vector_type != "qdrant":
- click.echo(click.style("Sorry, only support qdrant vector store.", fg="red"))
+ click.echo(click.style("This command only supports Qdrant vector store.", fg="red"))
return
create_count = 0
try:
bindings = db.session.query(DatasetCollectionBinding).all()
if not bindings:
- click.echo(click.style("Sorry, no dataset collection bindings found.", fg="red"))
+ click.echo(click.style("No dataset collection bindings found.", fg="red"))
return
import qdrant_client
from qdrant_client.http.exceptions import UnexpectedResponse
@@ -522,7 +518,7 @@ def add_qdrant_doc_id_index(field: str):
for binding in bindings:
if dify_config.QDRANT_URL is None:
- raise ValueError("Qdrant url is required.")
+ raise ValueError("Qdrant URL is required.")
qdrant_config = QdrantConfig(
endpoint=dify_config.QDRANT_URL,
api_key=dify_config.QDRANT_API_KEY,
@@ -539,41 +535,39 @@ def add_qdrant_doc_id_index(field: str):
except UnexpectedResponse as e:
# Collection does not exist, so return
if e.status_code == 404:
- click.echo(
- click.style(f"Collection not found, collection_name:{binding.collection_name}.", fg="red")
- )
+ click.echo(click.style(f"Collection not found: {binding.collection_name}.", fg="red"))
continue
# Some other error occurred, so re-raise the exception
else:
click.echo(
click.style(
- f"Failed to create qdrant index, collection_name:{binding.collection_name}.", fg="red"
+ f"Failed to create Qdrant index for collection: {binding.collection_name}.", fg="red"
)
)
except Exception as e:
- click.echo(click.style("Failed to create qdrant client.", fg="red"))
+ click.echo(click.style("Failed to create Qdrant client.", fg="red"))
- click.echo(click.style(f"Congratulations! Create {create_count} collection indexes.", fg="green"))
+ click.echo(click.style(f"Index creation complete. Created {create_count} collection indexes.", fg="green"))
@click.command("create-tenant", help="Create account and tenant.")
-@click.option("--email", prompt=True, help="The email address of the tenant account.")
-@click.option("--name", prompt=True, help="The workspace name of the tenant account.")
+@click.option("--email", prompt=True, help="Tenant account email.")
+@click.option("--name", prompt=True, help="Workspace name.")
@click.option("--language", prompt=True, help="Account language, default: en-US.")
def create_tenant(email: str, language: Optional[str] = None, name: Optional[str] = None):
"""
Create tenant account
"""
if not email:
- click.echo(click.style("Sorry, email is required.", fg="red"))
+ click.echo(click.style("Email is required.", fg="red"))
return
# Create account
email = email.strip()
if "@" not in email:
- click.echo(click.style("Sorry, invalid email address.", fg="red"))
+ click.echo(click.style("Invalid email address.", fg="red"))
return
account_name = email.split("@")[0]
@@ -593,19 +587,19 @@ def create_tenant(email: str, language: Optional[str] = None, name: Optional[str
click.echo(
click.style(
- "Congratulations! Account and tenant created.\nAccount: {}\nPassword: {}".format(email, new_password),
+ "Account and tenant created.\nAccount: {}\nPassword: {}".format(email, new_password),
fg="green",
)
)
-@click.command("upgrade-db", help="upgrade the database")
+@click.command("upgrade-db", help="Upgrade the database")
def upgrade_db():
click.echo("Preparing database migration...")
lock = redis_client.lock(name="db_upgrade_lock", timeout=60)
if lock.acquire(blocking=False):
try:
- click.echo(click.style("Start database migration.", fg="green"))
+ click.echo(click.style("Starting database migration.", fg="green"))
# run db migration
import flask_migrate
@@ -615,7 +609,7 @@ def upgrade_db():
click.echo(click.style("Database migration successful!", fg="green"))
except Exception as e:
- logging.exception(f"Database migration failed, error: {e}")
+ logging.exception(f"Database migration failed: {e}")
finally:
lock.release()
else:
@@ -627,7 +621,7 @@ def fix_app_site_missing():
"""
Fix app related site missing issue.
"""
- click.echo(click.style("Start fix app related site missing issue.", fg="green"))
+ click.echo(click.style("Starting fix for missing app-related sites.", fg="green"))
failed_app_ids = []
while True:
@@ -650,22 +644,22 @@ where sites.id is null limit 1000"""
if tenant:
accounts = tenant.get_accounts()
if not accounts:
- print("Fix app {} failed.".format(app.id))
+ print("Fix failed for app {}".format(app.id))
continue
account = accounts[0]
- print("Fix app {} related site missing issue.".format(app.id))
+ print("Fixing missing site for app {}".format(app.id))
app_was_created.send(app, account=account)
except Exception as e:
failed_app_ids.append(app_id)
- click.echo(click.style("Fix app {} related site missing issue failed!".format(app_id), fg="red"))
+ click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
logging.exception(f"Fix app related site missing issue failed, error: {e}")
continue
if not processed_count:
break
- click.echo(click.style("Congratulations! Fix app related site missing issue successful!", fg="green"))
+ click.echo(click.style("Fix for missing app-related sites completed successfully!", fg="green"))
def register_commands(app):
diff --git a/api/configs/deploy/__init__.py b/api/configs/deploy/__init__.py
index 10271483c4..66d6a55b4c 100644
--- a/api/configs/deploy/__init__.py
+++ b/api/configs/deploy/__init__.py
@@ -4,30 +4,30 @@ from pydantic_settings import BaseSettings
class DeploymentConfig(BaseSettings):
"""
- Deployment configs
+ Configuration settings for application deployment
"""
APPLICATION_NAME: str = Field(
- description="application name",
+ description="Name of the application, used for identification and logging purposes",
default="langgenius/dify",
)
DEBUG: bool = Field(
- description="whether to enable debug mode.",
+ description="Enable debug mode for additional logging and development features",
default=False,
)
TESTING: bool = Field(
- description="",
+ description="Enable testing mode for running automated tests",
default=False,
)
EDITION: str = Field(
- description="deployment edition",
+ description="Deployment edition of the application (e.g., 'SELF_HOSTED', 'CLOUD')",
default="SELF_HOSTED",
)
DEPLOY_ENV: str = Field(
- description="deployment environment, default to PRODUCTION.",
+ description="Deployment environment (e.g., 'PRODUCTION', 'DEVELOPMENT'), default to PRODUCTION",
default="PRODUCTION",
)
diff --git a/api/configs/enterprise/__init__.py b/api/configs/enterprise/__init__.py
index c661593a44..eda6345e14 100644
--- a/api/configs/enterprise/__init__.py
+++ b/api/configs/enterprise/__init__.py
@@ -4,17 +4,17 @@ from pydantic_settings import BaseSettings
class EnterpriseFeatureConfig(BaseSettings):
"""
- Enterprise feature configs.
+ Configuration for enterprise-level features.
**Before using, please contact business@dify.ai by email to inquire about licensing matters.**
"""
ENTERPRISE_ENABLED: bool = Field(
- description="whether to enable enterprise features."
+ description="Enable or disable enterprise-level features."
"Before using, please contact business@dify.ai by email to inquire about licensing matters.",
default=False,
)
CAN_REPLACE_LOGO: bool = Field(
- description="whether to allow replacing enterprise logo.",
+ description="Allow customization of the enterprise logo.",
default=False,
)
diff --git a/api/configs/extra/notion_config.py b/api/configs/extra/notion_config.py
index bd1268fa45..f9c4d73463 100644
--- a/api/configs/extra/notion_config.py
+++ b/api/configs/extra/notion_config.py
@@ -6,30 +6,31 @@ from pydantic_settings import BaseSettings
class NotionConfig(BaseSettings):
"""
- Notion integration configs
+ Configuration settings for Notion integration
"""
NOTION_CLIENT_ID: Optional[str] = Field(
- description="Notion client ID",
+ description="Client ID for Notion API authentication. Required for OAuth 2.0 flow.",
default=None,
)
NOTION_CLIENT_SECRET: Optional[str] = Field(
- description="Notion client secret key",
+ description="Client secret for Notion API authentication. Required for OAuth 2.0 flow.",
default=None,
)
NOTION_INTEGRATION_TYPE: Optional[str] = Field(
- description="Notion integration type, default to None, available values: internal.",
+ description="Type of Notion integration."
+ " Set to 'internal' for internal integrations, or None for public integrations.",
default=None,
)
NOTION_INTERNAL_SECRET: Optional[str] = Field(
- description="Notion internal secret key",
+ description="Secret key for internal Notion integrations. Required when NOTION_INTEGRATION_TYPE is 'internal'.",
default=None,
)
NOTION_INTEGRATION_TOKEN: Optional[str] = Field(
- description="Notion integration token",
+ description="Integration token for Notion API access. Used for direct API calls without OAuth flow.",
default=None,
)
diff --git a/api/configs/extra/sentry_config.py b/api/configs/extra/sentry_config.py
index ea9ea60ffb..f76a6bdb95 100644
--- a/api/configs/extra/sentry_config.py
+++ b/api/configs/extra/sentry_config.py
@@ -6,20 +6,23 @@ from pydantic_settings import BaseSettings
class SentryConfig(BaseSettings):
"""
- Sentry configs
+ Configuration settings for Sentry error tracking and performance monitoring
"""
SENTRY_DSN: Optional[str] = Field(
- description="Sentry DSN",
+ description="Sentry Data Source Name (DSN)."
+ " This is the unique identifier of your Sentry project, used to send events to the correct project.",
default=None,
)
SENTRY_TRACES_SAMPLE_RATE: NonNegativeFloat = Field(
- description="Sentry trace sample rate",
+ description="Sample rate for Sentry performance monitoring traces."
+ " Value between 0.0 and 1.0, where 1.0 means 100% of traces are sent to Sentry.",
default=1.0,
)
SENTRY_PROFILES_SAMPLE_RATE: NonNegativeFloat = Field(
- description="Sentry profiles sample rate",
+ description="Sample rate for Sentry profiling."
+ " Value between 0.0 and 1.0, where 1.0 means 100% of profiles are sent to Sentry.",
default=1.0,
)
diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py
index f794552c36..9218d529cc 100644
--- a/api/configs/feature/__init__.py
+++ b/api/configs/feature/__init__.py
@@ -8,145 +8,143 @@ from configs.feature.hosted_service import HostedServiceConfig
class SecurityConfig(BaseSettings):
"""
- Secret Key configs
+ Security-related configurations for the application
"""
SECRET_KEY: Optional[str] = Field(
- description="Your App secret key will be used for securely signing the session cookie"
+ description="Secret key for secure session cookie signing."
"Make sure you are changing this key for your deployment with a strong key."
- "You can generate a strong key using `openssl rand -base64 42`."
- "Alternatively you can set it with `SECRET_KEY` environment variable.",
+ "Generate a strong key using `openssl rand -base64 42` or set via the `SECRET_KEY` environment variable.",
default=None,
)
RESET_PASSWORD_TOKEN_EXPIRY_HOURS: PositiveInt = Field(
- description="Expiry time in hours for reset token",
+ description="Duration in hours for which a password reset token remains valid",
default=24,
)
class AppExecutionConfig(BaseSettings):
"""
- App Execution configs
+ Configuration parameters for application execution
"""
APP_MAX_EXECUTION_TIME: PositiveInt = Field(
- description="execution timeout in seconds for app execution",
+ description="Maximum allowed execution time for the application in seconds",
default=1200,
)
APP_MAX_ACTIVE_REQUESTS: NonNegativeInt = Field(
- description="max active request per app, 0 means unlimited",
+ description="Maximum number of concurrent active requests per app (0 for unlimited)",
default=0,
)
class CodeExecutionSandboxConfig(BaseSettings):
"""
- Code Execution Sandbox configs
+ Configuration for the code execution sandbox environment
"""
CODE_EXECUTION_ENDPOINT: HttpUrl = Field(
- description="endpoint URL of code execution service",
+ description="URL endpoint for the code execution service",
default="http://sandbox:8194",
)
CODE_EXECUTION_API_KEY: str = Field(
- description="API key for code execution service",
+ description="API key for accessing the code execution service",
default="dify-sandbox",
)
CODE_EXECUTION_CONNECT_TIMEOUT: Optional[float] = Field(
- description="connect timeout in seconds for code execution request",
+ description="Connection timeout in seconds for code execution requests",
default=10.0,
)
CODE_EXECUTION_READ_TIMEOUT: Optional[float] = Field(
- description="read timeout in seconds for code execution request",
+ description="Read timeout in seconds for code execution requests",
default=60.0,
)
CODE_EXECUTION_WRITE_TIMEOUT: Optional[float] = Field(
- description="write timeout in seconds for code execution request",
+ description="Write timeout in seconds for code execution request",
default=10.0,
)
CODE_MAX_NUMBER: PositiveInt = Field(
- description="max depth for code execution",
+ description="Maximum allowed numeric value in code execution",
default=9223372036854775807,
)
CODE_MIN_NUMBER: NegativeInt = Field(
- description="",
+ description="Minimum allowed numeric value in code execution",
default=-9223372036854775807,
)
CODE_MAX_DEPTH: PositiveInt = Field(
- description="max depth for code execution",
+ description="Maximum allowed depth for nested structures in code execution",
default=5,
)
CODE_MAX_PRECISION: PositiveInt = Field(
- description="max precision digits for float type in code execution",
+ description="mMaximum number of decimal places for floating-point numbers in code execution",
default=20,
)
CODE_MAX_STRING_LENGTH: PositiveInt = Field(
- description="max string length for code execution",
+ description="Maximum allowed length for strings in code execution",
default=80000,
)
CODE_MAX_STRING_ARRAY_LENGTH: PositiveInt = Field(
- description="",
+ description="Maximum allowed length for string arrays in code execution",
default=30,
)
CODE_MAX_OBJECT_ARRAY_LENGTH: PositiveInt = Field(
- description="",
+ description="Maximum allowed length for object arrays in code execution",
default=30,
)
CODE_MAX_NUMBER_ARRAY_LENGTH: PositiveInt = Field(
- description="",
+ description="Maximum allowed length for numeric arrays in code execution",
default=1000,
)
class EndpointConfig(BaseSettings):
"""
- Module URL configs
+ Configuration for various application endpoints and URLs
"""
CONSOLE_API_URL: str = Field(
- description="The backend URL prefix of the console API."
- "used to concatenate the login authorization callback or notion integration callback.",
+ description="Base URL for the console API,"
+ "used for login authentication callback or notion integration callbacks",
default="",
)
CONSOLE_WEB_URL: str = Field(
- description="The front-end URL prefix of the console web."
- "used to concatenate some front-end addresses and for CORS configuration use.",
+ description="Base URL for the console web interface," "used for frontend references and CORS configuration",
default="",
)
SERVICE_API_URL: str = Field(
- description="Service API Url prefix. used to display Service API Base Url to the front-end.",
+ description="Base URL for the service API, displayed to users for API access",
default="",
)
APP_WEB_URL: str = Field(
- description="WebApp Url prefix. used to display WebAPP API Base Url to the front-end.",
+ description="Base URL for the web application, used for frontend references",
default="",
)
class FileAccessConfig(BaseSettings):
"""
- File Access configs
+ Configuration for file access and handling
"""
FILES_URL: str = Field(
- description="File preview or download Url prefix."
- " used to display File preview or download Url to the front-end or as Multi-model inputs;"
+ description="Base URL for file preview or download,"
+ " used for frontend display and multi-model inputs"
"Url is signed and has expiration time.",
validation_alias=AliasChoices("FILES_URL", "CONSOLE_API_URL"),
alias_priority=1,
@@ -154,49 +152,49 @@ class FileAccessConfig(BaseSettings):
)
FILES_ACCESS_TIMEOUT: int = Field(
- description="timeout in seconds for file accessing",
+ description="Expiration time in seconds for file access URLs",
default=300,
)
class FileUploadConfig(BaseSettings):
"""
- File Uploading configs
+ Configuration for file upload limitations
"""
UPLOAD_FILE_SIZE_LIMIT: NonNegativeInt = Field(
- description="size limit in Megabytes for uploading files",
+ description="Maximum allowed file size for uploads in megabytes",
default=15,
)
UPLOAD_FILE_BATCH_LIMIT: NonNegativeInt = Field(
- description="batch size limit for uploading files",
+ description="Maximum number of files allowed in a single upload batch",
default=5,
)
UPLOAD_IMAGE_FILE_SIZE_LIMIT: NonNegativeInt = Field(
- description="image file size limit in Megabytes for uploading files",
+ description="Maximum allowed image file size for uploads in megabytes",
default=10,
)
BATCH_UPLOAD_LIMIT: NonNegativeInt = Field(
- description="", # todo: to be clarified
+ description="Maximum number of files allowed in a batch upload operation",
default=20,
)
class HttpConfig(BaseSettings):
"""
- HTTP configs
+ HTTP-related configurations for the application
"""
API_COMPRESSION_ENABLED: bool = Field(
- description="whether to enable HTTP response compression of gzip",
+ description="Enable or disable gzip compression for HTTP responses",
default=False,
)
inner_CONSOLE_CORS_ALLOW_ORIGINS: str = Field(
- description="",
+ description="Comma-separated list of allowed origins for CORS in the console",
validation_alias=AliasChoices("CONSOLE_CORS_ALLOW_ORIGINS", "CONSOLE_WEB_URL"),
default="",
)
@@ -218,359 +216,360 @@ class HttpConfig(BaseSettings):
return self.inner_WEB_API_CORS_ALLOW_ORIGINS.split(",")
HTTP_REQUEST_MAX_CONNECT_TIMEOUT: Annotated[
- PositiveInt, Field(ge=10, description="connect timeout in seconds for HTTP request")
+ PositiveInt, Field(ge=10, description="Maximum connection timeout in seconds for HTTP requests")
] = 10
HTTP_REQUEST_MAX_READ_TIMEOUT: Annotated[
- PositiveInt, Field(ge=60, description="read timeout in seconds for HTTP request")
+ PositiveInt, Field(ge=60, description="Maximum read timeout in seconds for HTTP requests")
] = 60
HTTP_REQUEST_MAX_WRITE_TIMEOUT: Annotated[
- PositiveInt, Field(ge=10, description="read timeout in seconds for HTTP request")
+ PositiveInt, Field(ge=10, description="Maximum write timeout in seconds for HTTP requests")
] = 20
HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field(
- description="",
+ description="Maximum allowed size in bytes for binary data in HTTP requests",
default=10 * 1024 * 1024,
)
HTTP_REQUEST_NODE_MAX_TEXT_SIZE: PositiveInt = Field(
- description="",
+ description="Maximum allowed size in bytes for text data in HTTP requests",
default=1 * 1024 * 1024,
)
SSRF_PROXY_HTTP_URL: Optional[str] = Field(
- description="HTTP URL for SSRF proxy",
+ description="Proxy URL for HTTP requests to prevent Server-Side Request Forgery (SSRF)",
default=None,
)
SSRF_PROXY_HTTPS_URL: Optional[str] = Field(
- description="HTTPS URL for SSRF proxy",
+ description="Proxy URL for HTTPS requests to prevent Server-Side Request Forgery (SSRF)",
default=None,
)
class InnerAPIConfig(BaseSettings):
"""
- Inner API configs
+ Configuration for internal API functionality
"""
INNER_API: bool = Field(
- description="whether to enable the inner API",
+ description="Enable or disable the internal API",
default=False,
)
INNER_API_KEY: Optional[str] = Field(
- description="The inner API key is used to authenticate the inner API",
+ description="API key for accessing the internal API",
default=None,
)
class LoggingConfig(BaseSettings):
"""
- Logging configs
+ Configuration for application logging
"""
LOG_LEVEL: str = Field(
- description="Log output level, default to INFO. It is recommended to set it to ERROR for production.",
+ description="Logging level, default to INFO. Set to ERROR for production environments.",
default="INFO",
)
LOG_FILE: Optional[str] = Field(
- description="logging output file path",
+ description="File path for log output.",
default=None,
)
LOG_FORMAT: str = Field(
- description="log format",
+ description="Format string for log messages",
default="%(asctime)s.%(msecs)03d %(levelname)s [%(threadName)s] [%(filename)s:%(lineno)d] - %(message)s",
)
LOG_DATEFORMAT: Optional[str] = Field(
- description="log date format",
+ description="Date format string for log timestamps",
default=None,
)
LOG_TZ: Optional[str] = Field(
- description="specify log timezone, eg: America/New_York",
+ description="Timezone for log timestamps (e.g., 'America/New_York')",
default=None,
)
class ModelLoadBalanceConfig(BaseSettings):
"""
- Model load balance configs
+ Configuration for model load balancing
"""
MODEL_LB_ENABLED: bool = Field(
- description="whether to enable model load balancing",
+ description="Enable or disable load balancing for models",
default=False,
)
class BillingConfig(BaseSettings):
"""
- Platform Billing Configurations
+ Configuration for platform billing features
"""
BILLING_ENABLED: bool = Field(
- description="whether to enable billing",
+ description="Enable or disable billing functionality",
default=False,
)
class UpdateConfig(BaseSettings):
"""
- Update configs
+ Configuration for application update checks
"""
CHECK_UPDATE_URL: str = Field(
- description="url for checking updates",
+ description="URL to check for application updates",
default="https://updates.dify.ai",
)
class WorkflowConfig(BaseSettings):
"""
- Workflow feature configs
+ Configuration for workflow execution
"""
WORKFLOW_MAX_EXECUTION_STEPS: PositiveInt = Field(
- description="max execution steps in single workflow execution",
+ description="Maximum number of steps allowed in a single workflow execution",
default=500,
)
WORKFLOW_MAX_EXECUTION_TIME: PositiveInt = Field(
- description="max execution time in seconds in single workflow execution",
+ description="Maximum execution time in seconds for a single workflow",
default=1200,
)
WORKFLOW_CALL_MAX_DEPTH: PositiveInt = Field(
- description="max depth of calling in single workflow execution",
+ description="Maximum allowed depth for nested workflow calls",
default=5,
)
MAX_VARIABLE_SIZE: PositiveInt = Field(
- description="The maximum size in bytes of a variable. default to 5KB.",
+ description="Maximum size in bytes for a single variable in workflows. Default to 5KB.",
default=5 * 1024,
)
class OAuthConfig(BaseSettings):
"""
- oauth configs
+ Configuration for OAuth authentication
"""
OAUTH_REDIRECT_PATH: str = Field(
- description="redirect path for OAuth",
+ description="Redirect path for OAuth authentication callbacks",
default="/console/api/oauth/authorize",
)
GITHUB_CLIENT_ID: Optional[str] = Field(
- description="GitHub client id for OAuth",
+ description="GitHub OAuth client secret",
default=None,
)
GITHUB_CLIENT_SECRET: Optional[str] = Field(
- description="GitHub client secret key for OAuth",
+ description="GitHub OAuth client secret",
default=None,
)
GOOGLE_CLIENT_ID: Optional[str] = Field(
- description="Google client id for OAuth",
+ description="Google OAuth client ID",
default=None,
)
GOOGLE_CLIENT_SECRET: Optional[str] = Field(
- description="Google client secret key for OAuth",
+ description="Google OAuth client secret",
default=None,
)
class ModerationConfig(BaseSettings):
"""
- Moderation in app configs.
+ Configuration for content moderation
"""
MODERATION_BUFFER_SIZE: PositiveInt = Field(
- description="buffer size for moderation",
+ description="Size of the buffer for content moderation processing",
default=300,
)
class ToolConfig(BaseSettings):
"""
- Tool configs
+ Configuration for tool management
"""
TOOL_ICON_CACHE_MAX_AGE: PositiveInt = Field(
- description="max age in seconds for tool icon caching",
+ description="Maximum age in seconds for caching tool icons",
default=3600,
)
class MailConfig(BaseSettings):
"""
- Mail Configurations
+ Configuration for email services
"""
MAIL_TYPE: Optional[str] = Field(
- description="Mail provider type name, default to None, available values are `smtp` and `resend`.",
+ description="Email service provider type ('smtp' or 'resend'), default to None.",
default=None,
)
MAIL_DEFAULT_SEND_FROM: Optional[str] = Field(
- description="default email address for sending from ",
+ description="Default email address to use as the sender",
default=None,
)
RESEND_API_KEY: Optional[str] = Field(
- description="API key for Resend",
+ description="API key for Resend email service",
default=None,
)
RESEND_API_URL: Optional[str] = Field(
- description="API URL for Resend",
+ description="API URL for Resend email service",
default=None,
)
SMTP_SERVER: Optional[str] = Field(
- description="smtp server host",
+ description="SMTP server hostname",
default=None,
)
SMTP_PORT: Optional[int] = Field(
- description="smtp server port",
+ description="SMTP server port number",
default=465,
)
SMTP_USERNAME: Optional[str] = Field(
- description="smtp server username",
+ description="Username for SMTP authentication",
default=None,
)
SMTP_PASSWORD: Optional[str] = Field(
- description="smtp server password",
+ description="Password for SMTP authentication",
default=None,
)
SMTP_USE_TLS: bool = Field(
- description="whether to use TLS connection to smtp server",
+ description="Enable TLS encryption for SMTP connections",
default=False,
)
SMTP_OPPORTUNISTIC_TLS: bool = Field(
- description="whether to use opportunistic TLS connection to smtp server",
+ description="Enable opportunistic TLS for SMTP connections",
default=False,
)
class RagEtlConfig(BaseSettings):
"""
- RAG ETL Configurations.
+ Configuration for RAG ETL processes
"""
ETL_TYPE: str = Field(
- description="RAG ETL type name, default to `dify`, available values are `dify` and `Unstructured`. ",
+ description="RAG ETL type ('dify' or 'Unstructured'), default to 'dify'",
default="dify",
)
KEYWORD_DATA_SOURCE_TYPE: str = Field(
- description="source type for keyword data, default to `database`, available values are `database` .",
+ description="Data source type for keyword extraction"
+ " ('database' or other supported types), default to 'database'",
default="database",
)
UNSTRUCTURED_API_URL: Optional[str] = Field(
- description="API URL for Unstructured",
+ description="API URL for Unstructured.io service",
default=None,
)
UNSTRUCTURED_API_KEY: Optional[str] = Field(
- description="API key for Unstructured",
+ description="API key for Unstructured.io service",
default=None,
)
class DataSetConfig(BaseSettings):
"""
- Dataset configs
+ Configuration for dataset management
"""
CLEAN_DAY_SETTING: PositiveInt = Field(
- description="interval in days for cleaning up dataset",
+ description="Interval in days for dataset cleanup operations",
default=30,
)
DATASET_OPERATOR_ENABLED: bool = Field(
- description="whether to enable dataset operator",
+ description="Enable or disable dataset operator functionality",
default=False,
)
class WorkspaceConfig(BaseSettings):
"""
- Workspace configs
+ Configuration for workspace management
"""
INVITE_EXPIRY_HOURS: PositiveInt = Field(
- description="workspaces invitation expiration in hours",
+ description="Expiration time in hours for workspace invitation links",
default=72,
)
class IndexingConfig(BaseSettings):
"""
- Indexing configs.
+ Configuration for indexing operations
"""
INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: PositiveInt = Field(
- description="max segmentation token length for indexing",
+ description="Maximum token length for text segmentation during indexing",
default=1000,
)
class ImageFormatConfig(BaseSettings):
MULTIMODAL_SEND_IMAGE_FORMAT: str = Field(
- description="multi model send image format, support base64, url, default is base64",
+ description="Format for sending images in multimodal contexts ('base64' or 'url'), default is base64",
default="base64",
)
class CeleryBeatConfig(BaseSettings):
CELERY_BEAT_SCHEDULER_TIME: int = Field(
- description="the time of the celery scheduler, default to 1 day",
+ description="Interval in days for Celery Beat scheduler execution, default to 1 day",
default=1,
)
class PositionConfig(BaseSettings):
POSITION_PROVIDER_PINS: str = Field(
- description="The heads of model providers",
+ description="Comma-separated list of pinned model providers",
default="",
)
POSITION_PROVIDER_INCLUDES: str = Field(
- description="The included model providers",
+ description="Comma-separated list of included model providers",
default="",
)
POSITION_PROVIDER_EXCLUDES: str = Field(
- description="The excluded model providers",
+ description="Comma-separated list of excluded model providers",
default="",
)
POSITION_TOOL_PINS: str = Field(
- description="The heads of tools",
+ description="Comma-separated list of pinned tools",
default="",
)
POSITION_TOOL_INCLUDES: str = Field(
- description="The included tools",
+ description="Comma-separated list of included tools",
default="",
)
POSITION_TOOL_EXCLUDES: str = Field(
- description="The excluded tools",
+ description="Comma-separated list of excluded tools",
default="",
)
diff --git a/api/configs/feature/hosted_service/__init__.py b/api/configs/feature/hosted_service/__init__.py
index f269d0ab9c..7f103be8f4 100644
--- a/api/configs/feature/hosted_service/__init__.py
+++ b/api/configs/feature/hosted_service/__init__.py
@@ -6,31 +6,31 @@ from pydantic_settings import BaseSettings
class HostedOpenAiConfig(BaseSettings):
"""
- Hosted OpenAI service config
+ Configuration for hosted OpenAI service
"""
HOSTED_OPENAI_API_KEY: Optional[str] = Field(
- description="",
+ description="API key for hosted OpenAI service",
default=None,
)
HOSTED_OPENAI_API_BASE: Optional[str] = Field(
- description="",
+ description="Base URL for hosted OpenAI API",
default=None,
)
HOSTED_OPENAI_API_ORGANIZATION: Optional[str] = Field(
- description="",
+ description="Organization ID for hosted OpenAI service",
default=None,
)
HOSTED_OPENAI_TRIAL_ENABLED: bool = Field(
- description="",
+ description="Enable trial access to hosted OpenAI service",
default=False,
)
HOSTED_OPENAI_TRIAL_MODELS: str = Field(
- description="",
+ description="Comma-separated list of available models for trial access",
default="gpt-3.5-turbo,"
"gpt-3.5-turbo-1106,"
"gpt-3.5-turbo-instruct,"
@@ -42,17 +42,17 @@ class HostedOpenAiConfig(BaseSettings):
)
HOSTED_OPENAI_QUOTA_LIMIT: NonNegativeInt = Field(
- description="",
+ description="Quota limit for hosted OpenAI service usage",
default=200,
)
HOSTED_OPENAI_PAID_ENABLED: bool = Field(
- description="",
+ description="Enable paid access to hosted OpenAI service",
default=False,
)
HOSTED_OPENAI_PAID_MODELS: str = Field(
- description="",
+ description="Comma-separated list of available models for paid access",
default="gpt-4,"
"gpt-4-turbo-preview,"
"gpt-4-turbo-2024-04-09,"
@@ -71,124 +71,122 @@ class HostedOpenAiConfig(BaseSettings):
class HostedAzureOpenAiConfig(BaseSettings):
"""
- Hosted OpenAI service config
+ Configuration for hosted Azure OpenAI service
"""
HOSTED_AZURE_OPENAI_ENABLED: bool = Field(
- description="",
+ description="Enable hosted Azure OpenAI service",
default=False,
)
HOSTED_AZURE_OPENAI_API_KEY: Optional[str] = Field(
- description="",
+ description="API key for hosted Azure OpenAI service",
default=None,
)
HOSTED_AZURE_OPENAI_API_BASE: Optional[str] = Field(
- description="",
+ description="Base URL for hosted Azure OpenAI API",
default=None,
)
HOSTED_AZURE_OPENAI_QUOTA_LIMIT: NonNegativeInt = Field(
- description="",
+ description="Quota limit for hosted Azure OpenAI service usage",
default=200,
)
class HostedAnthropicConfig(BaseSettings):
"""
- Hosted Azure OpenAI service config
+ Configuration for hosted Anthropic service
"""
HOSTED_ANTHROPIC_API_BASE: Optional[str] = Field(
- description="",
+ description="Base URL for hosted Anthropic API",
default=None,
)
HOSTED_ANTHROPIC_API_KEY: Optional[str] = Field(
- description="",
+ description="API key for hosted Anthropic service",
default=None,
)
HOSTED_ANTHROPIC_TRIAL_ENABLED: bool = Field(
- description="",
+ description="Enable trial access to hosted Anthropic service",
default=False,
)
HOSTED_ANTHROPIC_QUOTA_LIMIT: NonNegativeInt = Field(
- description="",
+ description="Quota limit for hosted Anthropic service usage",
default=600000,
)
HOSTED_ANTHROPIC_PAID_ENABLED: bool = Field(
- description="",
+ description="Enable paid access to hosted Anthropic service",
default=False,
)
class HostedMinmaxConfig(BaseSettings):
"""
- Hosted Minmax service config
+ Configuration for hosted Minmax service
"""
HOSTED_MINIMAX_ENABLED: bool = Field(
- description="",
+ description="Enable hosted Minmax service",
default=False,
)
class HostedSparkConfig(BaseSettings):
"""
- Hosted Spark service config
+ Configuration for hosted Spark service
"""
HOSTED_SPARK_ENABLED: bool = Field(
- description="",
+ description="Enable hosted Spark service",
default=False,
)
class HostedZhipuAIConfig(BaseSettings):
"""
- Hosted Minmax service config
+ Configuration for hosted ZhipuAI service
"""
HOSTED_ZHIPUAI_ENABLED: bool = Field(
- description="",
+ description="Enable hosted ZhipuAI service",
default=False,
)
class HostedModerationConfig(BaseSettings):
"""
- Hosted Moderation service config
+ Configuration for hosted Moderation service
"""
HOSTED_MODERATION_ENABLED: bool = Field(
- description="",
+ description="Enable hosted Moderation service",
default=False,
)
HOSTED_MODERATION_PROVIDERS: str = Field(
- description="",
+ description="Comma-separated list of moderation providers",
default="",
)
class HostedFetchAppTemplateConfig(BaseSettings):
"""
- Hosted Moderation service config
+ Configuration for fetching app templates
"""
HOSTED_FETCH_APP_TEMPLATES_MODE: str = Field(
- description="the mode for fetching app templates,"
- " default to remote,"
- " available values: remote, db, builtin",
+ description="Mode for fetching app templates: remote, db, or builtin" " default to remote,",
default="remote",
)
HOSTED_FETCH_APP_TEMPLATES_REMOTE_DOMAIN: str = Field(
- description="the domain for fetching remote app templates",
+ description="Domain for fetching remote app templates",
default="https://tmpl.dify.ai",
)
diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py
index e017c2c5b8..6ad216c191 100644
--- a/api/configs/middleware/__init__.py
+++ b/api/configs/middleware/__init__.py
@@ -31,70 +31,71 @@ from configs.middleware.vdb.weaviate_config import WeaviateConfig
class StorageConfig(BaseSettings):
STORAGE_TYPE: str = Field(
- description="storage type,"
- " default to `local`,"
- " available values are `local`, `s3`, `azure-blob`, `aliyun-oss`, `google-storage`.",
+ description="Type of storage to use."
+ " Options: 'local', 's3', 'azure-blob', 'aliyun-oss', 'google-storage'. Default is 'local'.",
default="local",
)
STORAGE_LOCAL_PATH: str = Field(
- description="local storage path",
+ description="Path for local storage when STORAGE_TYPE is set to 'local'.",
default="storage",
)
class VectorStoreConfig(BaseSettings):
VECTOR_STORE: Optional[str] = Field(
- description="vector store type",
+ description="Type of vector store to use for efficient similarity search."
+ " Set to None if not using a vector store.",
default=None,
)
class KeywordStoreConfig(BaseSettings):
KEYWORD_STORE: str = Field(
- description="keyword store type",
+ description="Method for keyword extraction and storage."
+ " Default is 'jieba', a Chinese text segmentation library.",
default="jieba",
)
class DatabaseConfig:
DB_HOST: str = Field(
- description="db host",
+ description="Hostname or IP address of the database server.",
default="localhost",
)
DB_PORT: PositiveInt = Field(
- description="db port",
+ description="Port number for database connection.",
default=5432,
)
DB_USERNAME: str = Field(
- description="db username",
+ description="Username for database authentication.",
default="postgres",
)
DB_PASSWORD: str = Field(
- description="db password",
+ description="Password for database authentication.",
default="",
)
DB_DATABASE: str = Field(
- description="db database",
+ description="Name of the database to connect to.",
default="dify",
)
DB_CHARSET: str = Field(
- description="db charset",
+ description="Character set for database connection.",
default="",
)
DB_EXTRAS: str = Field(
- description="db extras options. Example: keepalives_idle=60&keepalives=1",
+ description="Additional database connection parameters. Example: 'keepalives_idle=60&keepalives=1'",
default="",
)
SQLALCHEMY_DATABASE_URI_SCHEME: str = Field(
- description="db uri scheme",
+ description="Database URI scheme for SQLAlchemy connection.",
default="postgresql",
)
@@ -112,27 +113,27 @@ class DatabaseConfig:
)
SQLALCHEMY_POOL_SIZE: NonNegativeInt = Field(
- description="pool size of SqlAlchemy",
+ description="Maximum number of database connections in the pool.",
default=30,
)
SQLALCHEMY_MAX_OVERFLOW: NonNegativeInt = Field(
- description="max overflows for SqlAlchemy",
+ description="Maximum number of connections that can be created beyond the pool_size.",
default=10,
)
SQLALCHEMY_POOL_RECYCLE: NonNegativeInt = Field(
- description="SqlAlchemy pool recycle",
+ description="Number of seconds after which a connection is automatically recycled.",
default=3600,
)
SQLALCHEMY_POOL_PRE_PING: bool = Field(
- description="whether to enable pool pre-ping in SqlAlchemy",
+ description="If True, enables connection pool pre-ping feature to check connections.",
default=False,
)
SQLALCHEMY_ECHO: bool | str = Field(
- description="whether to enable SqlAlchemy echo",
+ description="If True, SQLAlchemy will log all SQL statements.",
default=False,
)
@@ -150,27 +151,27 @@ class DatabaseConfig:
class CeleryConfig(DatabaseConfig):
CELERY_BACKEND: str = Field(
- description="Celery backend, available values are `database`, `redis`",
+ description="Backend for Celery task results. Options: 'database', 'redis'.",
default="database",
)
CELERY_BROKER_URL: Optional[str] = Field(
- description="CELERY_BROKER_URL",
+ description="URL of the message broker for Celery tasks.",
default=None,
)
CELERY_USE_SENTINEL: Optional[bool] = Field(
- description="Whether to use Redis Sentinel mode",
+ description="Whether to use Redis Sentinel for high availability.",
default=False,
)
CELERY_SENTINEL_MASTER_NAME: Optional[str] = Field(
- description="Redis Sentinel master name",
+ description="Name of the Redis Sentinel master.",
default=None,
)
CELERY_SENTINEL_SOCKET_TIMEOUT: Optional[PositiveFloat] = Field(
- description="Redis Sentinel socket timeout",
+ description="Timeout for Redis Sentinel socket operations in seconds.",
default=0.1,
)
diff --git a/api/configs/middleware/cache/redis_config.py b/api/configs/middleware/cache/redis_config.py
index 4fcd52ddc9..26b9b1347c 100644
--- a/api/configs/middleware/cache/redis_config.py
+++ b/api/configs/middleware/cache/redis_config.py
@@ -6,65 +6,65 @@ from pydantic_settings import BaseSettings
class RedisConfig(BaseSettings):
"""
- Redis configs
+ Configuration settings for Redis connection
"""
REDIS_HOST: str = Field(
- description="Redis host",
+ description="Hostname or IP address of the Redis server",
default="localhost",
)
REDIS_PORT: PositiveInt = Field(
- description="Redis port",
+ description="Port number on which the Redis server is listening",
default=6379,
)
REDIS_USERNAME: Optional[str] = Field(
- description="Redis username",
+ description="Username for Redis authentication (if required)",
default=None,
)
REDIS_PASSWORD: Optional[str] = Field(
- description="Redis password",
+ description="Password for Redis authentication (if required)",
default=None,
)
REDIS_DB: NonNegativeInt = Field(
- description="Redis database id, default to 0",
+ description="Redis database number to use (0-15)",
default=0,
)
REDIS_USE_SSL: bool = Field(
- description="whether to use SSL for Redis connection",
+ description="Enable SSL/TLS for the Redis connection",
default=False,
)
REDIS_USE_SENTINEL: Optional[bool] = Field(
- description="Whether to use Redis Sentinel mode",
+ description="Enable Redis Sentinel mode for high availability",
default=False,
)
REDIS_SENTINELS: Optional[str] = Field(
- description="Redis Sentinel nodes",
+ description="Comma-separated list of Redis Sentinel nodes (host:port)",
default=None,
)
REDIS_SENTINEL_SERVICE_NAME: Optional[str] = Field(
- description="Redis Sentinel service name",
+ description="Name of the Redis Sentinel service to monitor",
default=None,
)
REDIS_SENTINEL_USERNAME: Optional[str] = Field(
- description="Redis Sentinel username",
+ description="Username for Redis Sentinel authentication (if required)",
default=None,
)
REDIS_SENTINEL_PASSWORD: Optional[str] = Field(
- description="Redis Sentinel password",
+ description="Password for Redis Sentinel authentication (if required)",
default=None,
)
REDIS_SENTINEL_SOCKET_TIMEOUT: Optional[PositiveFloat] = Field(
- description="Redis Sentinel socket timeout",
+ description="Socket timeout in seconds for Redis Sentinel connections",
default=0.1,
)
diff --git a/api/configs/middleware/storage/aliyun_oss_storage_config.py b/api/configs/middleware/storage/aliyun_oss_storage_config.py
index c1843dc26c..07eb527170 100644
--- a/api/configs/middleware/storage/aliyun_oss_storage_config.py
+++ b/api/configs/middleware/storage/aliyun_oss_storage_config.py
@@ -6,40 +6,40 @@ from pydantic_settings import BaseSettings
class AliyunOSSStorageConfig(BaseSettings):
"""
- Aliyun storage configs
+ Configuration settings for Aliyun Object Storage Service (OSS)
"""
ALIYUN_OSS_BUCKET_NAME: Optional[str] = Field(
- description="Aliyun OSS bucket name",
+ description="Name of the Aliyun OSS bucket to store and retrieve objects",
default=None,
)
ALIYUN_OSS_ACCESS_KEY: Optional[str] = Field(
- description="Aliyun OSS access key",
+ description="Access key ID for authenticating with Aliyun OSS",
default=None,
)
ALIYUN_OSS_SECRET_KEY: Optional[str] = Field(
- description="Aliyun OSS secret key",
+ description="Secret access key for authenticating with Aliyun OSS",
default=None,
)
ALIYUN_OSS_ENDPOINT: Optional[str] = Field(
- description="Aliyun OSS endpoint URL",
+ description="URL of the Aliyun OSS endpoint for your chosen region",
default=None,
)
ALIYUN_OSS_REGION: Optional[str] = Field(
- description="Aliyun OSS region",
+ description="Aliyun OSS region where your bucket is located (e.g., 'oss-cn-hangzhou')",
default=None,
)
ALIYUN_OSS_AUTH_VERSION: Optional[str] = Field(
- description="Aliyun OSS authentication version",
+ description="Version of the authentication protocol to use with Aliyun OSS (e.g., 'v4')",
default=None,
)
ALIYUN_OSS_PATH: Optional[str] = Field(
- description="Aliyun OSS path",
+ description="Base path within the bucket to store objects (e.g., 'my-app-data/')",
default=None,
)
diff --git a/api/configs/middleware/storage/amazon_s3_storage_config.py b/api/configs/middleware/storage/amazon_s3_storage_config.py
index bef9326108..f2d94b12ff 100644
--- a/api/configs/middleware/storage/amazon_s3_storage_config.py
+++ b/api/configs/middleware/storage/amazon_s3_storage_config.py
@@ -6,40 +6,40 @@ from pydantic_settings import BaseSettings
class S3StorageConfig(BaseSettings):
"""
- S3 storage configs
+ Configuration settings for S3-compatible object storage
"""
S3_ENDPOINT: Optional[str] = Field(
- description="S3 storage endpoint",
+ description="URL of the S3-compatible storage endpoint (e.g., 'https://s3.amazonaws.com')",
default=None,
)
S3_REGION: Optional[str] = Field(
- description="S3 storage region",
+ description="Region where the S3 bucket is located (e.g., 'us-east-1')",
default=None,
)
S3_BUCKET_NAME: Optional[str] = Field(
- description="S3 storage bucket name",
+ description="Name of the S3 bucket to store and retrieve objects",
default=None,
)
S3_ACCESS_KEY: Optional[str] = Field(
- description="S3 storage access key",
+ description="Access key ID for authenticating with the S3 service",
default=None,
)
S3_SECRET_KEY: Optional[str] = Field(
- description="S3 storage secret key",
+ description="Secret access key for authenticating with the S3 service",
default=None,
)
S3_ADDRESS_STYLE: str = Field(
- description="S3 storage address style",
+ description="S3 addressing style: 'auto', 'path', or 'virtual'",
default="auto",
)
S3_USE_AWS_MANAGED_IAM: bool = Field(
- description="whether to use aws managed IAM for S3",
+ description="Use AWS managed IAM roles for authentication instead of access/secret keys",
default=False,
)
diff --git a/api/configs/middleware/storage/azure_blob_storage_config.py b/api/configs/middleware/storage/azure_blob_storage_config.py
index 10944b58ed..b7ab5247a9 100644
--- a/api/configs/middleware/storage/azure_blob_storage_config.py
+++ b/api/configs/middleware/storage/azure_blob_storage_config.py
@@ -6,25 +6,25 @@ from pydantic_settings import BaseSettings
class AzureBlobStorageConfig(BaseSettings):
"""
- Azure Blob storage configs
+ Configuration settings for Azure Blob Storage
"""
AZURE_BLOB_ACCOUNT_NAME: Optional[str] = Field(
- description="Azure Blob account name",
+ description="Name of the Azure Storage account (e.g., 'mystorageaccount')",
default=None,
)
AZURE_BLOB_ACCOUNT_KEY: Optional[str] = Field(
- description="Azure Blob account key",
+ description="Access key for authenticating with the Azure Storage account",
default=None,
)
AZURE_BLOB_CONTAINER_NAME: Optional[str] = Field(
- description="Azure Blob container name",
+ description="Name of the Azure Blob container to store and retrieve objects",
default=None,
)
AZURE_BLOB_ACCOUNT_URL: Optional[str] = Field(
- description="Azure Blob account URL",
+ description="URL of the Azure Blob storage endpoint (e.g., 'https://mystorageaccount.blob.core.windows.net')",
default=None,
)
diff --git a/api/configs/middleware/storage/google_cloud_storage_config.py b/api/configs/middleware/storage/google_cloud_storage_config.py
index 10a2d97e8d..e5d763d7f5 100644
--- a/api/configs/middleware/storage/google_cloud_storage_config.py
+++ b/api/configs/middleware/storage/google_cloud_storage_config.py
@@ -6,15 +6,15 @@ from pydantic_settings import BaseSettings
class GoogleCloudStorageConfig(BaseSettings):
"""
- Google Cloud storage configs
+ Configuration settings for Google Cloud Storage
"""
GOOGLE_STORAGE_BUCKET_NAME: Optional[str] = Field(
- description="Google Cloud storage bucket name",
+ description="Name of the Google Cloud Storage bucket to store and retrieve objects (e.g., 'my-gcs-bucket')",
default=None,
)
GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64: Optional[str] = Field(
- description="Google Cloud storage service account json base64",
+ description="Base64-encoded JSON key file for Google Cloud service account authentication",
default=None,
)
diff --git a/api/configs/middleware/storage/huawei_obs_storage_config.py b/api/configs/middleware/storage/huawei_obs_storage_config.py
index c5cb379cae..3e9e7543ab 100644
--- a/api/configs/middleware/storage/huawei_obs_storage_config.py
+++ b/api/configs/middleware/storage/huawei_obs_storage_config.py
@@ -5,25 +5,25 @@ from pydantic import BaseModel, Field
class HuaweiCloudOBSStorageConfig(BaseModel):
"""
- Huawei Cloud OBS storage configs
+ Configuration settings for Huawei Cloud Object Storage Service (OBS)
"""
HUAWEI_OBS_BUCKET_NAME: Optional[str] = Field(
- description="Huawei Cloud OBS bucket name",
+ description="Name of the Huawei Cloud OBS bucket to store and retrieve objects (e.g., 'my-obs-bucket')",
default=None,
)
HUAWEI_OBS_ACCESS_KEY: Optional[str] = Field(
- description="Huawei Cloud OBS Access key",
+ description="Access Key ID for authenticating with Huawei Cloud OBS",
default=None,
)
HUAWEI_OBS_SECRET_KEY: Optional[str] = Field(
- description="Huawei Cloud OBS Secret key",
+ description="Secret Access Key for authenticating with Huawei Cloud OBS",
default=None,
)
HUAWEI_OBS_SERVER: Optional[str] = Field(
- description="Huawei Cloud OBS server URL",
+ description="Endpoint URL for Huawei Cloud OBS (e.g., 'https://obs.cn-north-4.myhuaweicloud.com')",
default=None,
)
diff --git a/api/configs/middleware/storage/oci_storage_config.py b/api/configs/middleware/storage/oci_storage_config.py
index f8993496c9..edc245bcac 100644
--- a/api/configs/middleware/storage/oci_storage_config.py
+++ b/api/configs/middleware/storage/oci_storage_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class OCIStorageConfig(BaseSettings):
"""
- OCI storage configs
+ Configuration settings for Oracle Cloud Infrastructure (OCI) Object Storage
"""
OCI_ENDPOINT: Optional[str] = Field(
- description="OCI storage endpoint",
+ description="URL of the OCI Object Storage endpoint (e.g., 'https://objectstorage.us-phoenix-1.oraclecloud.com')",
default=None,
)
OCI_REGION: Optional[str] = Field(
- description="OCI storage region",
+ description="OCI region where the bucket is located (e.g., 'us-phoenix-1')",
default=None,
)
OCI_BUCKET_NAME: Optional[str] = Field(
- description="OCI storage bucket name",
+ description="Name of the OCI Object Storage bucket to store and retrieve objects (e.g., 'my-oci-bucket')",
default=None,
)
OCI_ACCESS_KEY: Optional[str] = Field(
- description="OCI storage access key",
+ description="Access key (also known as API key) for authenticating with OCI Object Storage",
default=None,
)
OCI_SECRET_KEY: Optional[str] = Field(
- description="OCI storage secret key",
+ description="Secret key associated with the access key for authenticating with OCI Object Storage",
default=None,
)
diff --git a/api/configs/middleware/storage/tencent_cos_storage_config.py b/api/configs/middleware/storage/tencent_cos_storage_config.py
index 765ac08f3e..255c4e8938 100644
--- a/api/configs/middleware/storage/tencent_cos_storage_config.py
+++ b/api/configs/middleware/storage/tencent_cos_storage_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class TencentCloudCOSStorageConfig(BaseSettings):
"""
- Tencent Cloud COS storage configs
+ Configuration settings for Tencent Cloud Object Storage (COS)
"""
TENCENT_COS_BUCKET_NAME: Optional[str] = Field(
- description="Tencent Cloud COS bucket name",
+ description="Name of the Tencent Cloud COS bucket to store and retrieve objects",
default=None,
)
TENCENT_COS_REGION: Optional[str] = Field(
- description="Tencent Cloud COS region",
+ description="Tencent Cloud region where the COS bucket is located (e.g., 'ap-guangzhou')",
default=None,
)
TENCENT_COS_SECRET_ID: Optional[str] = Field(
- description="Tencent Cloud COS secret id",
+ description="SecretId for authenticating with Tencent Cloud COS (part of API credentials)",
default=None,
)
TENCENT_COS_SECRET_KEY: Optional[str] = Field(
- description="Tencent Cloud COS secret key",
+ description="SecretKey for authenticating with Tencent Cloud COS (part of API credentials)",
default=None,
)
TENCENT_COS_SCHEME: Optional[str] = Field(
- description="Tencent Cloud COS scheme",
+ description="Protocol scheme for COS requests: 'https' (recommended) or 'http'",
default=None,
)
diff --git a/api/configs/middleware/storage/volcengine_tos_storage_config.py b/api/configs/middleware/storage/volcengine_tos_storage_config.py
index a0e09a3cc7..89ea885002 100644
--- a/api/configs/middleware/storage/volcengine_tos_storage_config.py
+++ b/api/configs/middleware/storage/volcengine_tos_storage_config.py
@@ -5,30 +5,30 @@ from pydantic import BaseModel, Field
class VolcengineTOSStorageConfig(BaseModel):
"""
- Volcengine tos storage configs
+ Configuration settings for Volcengine Tinder Object Storage (TOS)
"""
VOLCENGINE_TOS_BUCKET_NAME: Optional[str] = Field(
- description="Volcengine TOS Bucket Name",
+ description="Name of the Volcengine TOS bucket to store and retrieve objects (e.g., 'my-tos-bucket')",
default=None,
)
VOLCENGINE_TOS_ACCESS_KEY: Optional[str] = Field(
- description="Volcengine TOS Access Key",
+ description="Access Key ID for authenticating with Volcengine TOS",
default=None,
)
VOLCENGINE_TOS_SECRET_KEY: Optional[str] = Field(
- description="Volcengine TOS Secret Key",
+ description="Secret Access Key for authenticating with Volcengine TOS",
default=None,
)
VOLCENGINE_TOS_ENDPOINT: Optional[str] = Field(
- description="Volcengine TOS Endpoint URL",
+ description="URL of the Volcengine TOS endpoint (e.g., 'https://tos-cn-beijing.volces.com')",
default=None,
)
VOLCENGINE_TOS_REGION: Optional[str] = Field(
- description="Volcengine TOS Region",
+ description="Volcengine region where the TOS bucket is located (e.g., 'cn-beijing')",
default=None,
)
diff --git a/api/configs/middleware/vdb/analyticdb_config.py b/api/configs/middleware/vdb/analyticdb_config.py
index 04f5b0e5bf..247a8ea555 100644
--- a/api/configs/middleware/vdb/analyticdb_config.py
+++ b/api/configs/middleware/vdb/analyticdb_config.py
@@ -5,33 +5,38 @@ from pydantic import BaseModel, Field
class AnalyticdbConfig(BaseModel):
"""
- Configuration for connecting to AnalyticDB.
+ Configuration for connecting to Alibaba Cloud AnalyticDB for PostgreSQL.
Refer to the following documentation for details on obtaining credentials:
https://www.alibabacloud.com/help/en/analyticdb-for-postgresql/getting-started/create-an-instance-instances-with-vector-engine-optimization-enabled
"""
ANALYTICDB_KEY_ID: Optional[str] = Field(
- default=None, description="The Access Key ID provided by Alibaba Cloud for authentication."
+ default=None, description="The Access Key ID provided by Alibaba Cloud for API authentication."
)
ANALYTICDB_KEY_SECRET: Optional[str] = Field(
- default=None, description="The Secret Access Key corresponding to the Access Key ID for secure access."
+ default=None, description="The Secret Access Key corresponding to the Access Key ID for secure API access."
)
ANALYTICDB_REGION_ID: Optional[str] = Field(
- default=None, description="The region where the AnalyticDB instance is deployed (e.g., 'cn-hangzhou')."
+ default=None,
+ description="The region where the AnalyticDB instance is deployed (e.g., 'cn-hangzhou', 'ap-southeast-1').",
)
ANALYTICDB_INSTANCE_ID: Optional[str] = Field(
default=None,
- description="The unique identifier of the AnalyticDB instance you want to connect to (e.g., 'gp-ab123456')..",
+ description="The unique identifier of the AnalyticDB instance you want to connect to.",
)
ANALYTICDB_ACCOUNT: Optional[str] = Field(
- default=None, description="The account name used to log in to the AnalyticDB instance."
+ default=None,
+ description="The account name used to log in to the AnalyticDB instance"
+ " (usually the initial account created with the instance).",
)
ANALYTICDB_PASSWORD: Optional[str] = Field(
- default=None, description="The password associated with the AnalyticDB account for authentication."
+ default=None, description="The password associated with the AnalyticDB account for database authentication."
)
ANALYTICDB_NAMESPACE: Optional[str] = Field(
- default=None, description="The namespace within AnalyticDB for schema isolation."
+ default=None, description="The namespace within AnalyticDB for schema isolation (if using namespace feature)."
)
ANALYTICDB_NAMESPACE_PASSWORD: Optional[str] = Field(
- default=None, description="The password for accessing the specified namespace within the AnalyticDB instance."
+ default=None,
+ description="The password for accessing the specified namespace within the AnalyticDB instance"
+ " (if namespace feature is enabled).",
)
diff --git a/api/configs/middleware/vdb/chroma_config.py b/api/configs/middleware/vdb/chroma_config.py
index d386623a56..e83a9902de 100644
--- a/api/configs/middleware/vdb/chroma_config.py
+++ b/api/configs/middleware/vdb/chroma_config.py
@@ -6,35 +6,35 @@ from pydantic_settings import BaseSettings
class ChromaConfig(BaseSettings):
"""
- Chroma configs
+ Configuration settings for Chroma vector database
"""
CHROMA_HOST: Optional[str] = Field(
- description="Chroma host",
+ description="Hostname or IP address of the Chroma server (e.g., 'localhost' or '192.168.1.100')",
default=None,
)
CHROMA_PORT: PositiveInt = Field(
- description="Chroma port",
+ description="Port number on which the Chroma server is listening (default is 8000)",
default=8000,
)
CHROMA_TENANT: Optional[str] = Field(
- description="Chroma database",
+ description="Tenant identifier for multi-tenancy support in Chroma",
default=None,
)
CHROMA_DATABASE: Optional[str] = Field(
- description="Chroma database",
+ description="Name of the Chroma database to connect to",
default=None,
)
CHROMA_AUTH_PROVIDER: Optional[str] = Field(
- description="Chroma authentication provider",
+ description="Authentication provider for Chroma (e.g., 'basic', 'token', or a custom provider)",
default=None,
)
CHROMA_AUTH_CREDENTIALS: Optional[str] = Field(
- description="Chroma authentication credentials",
+ description="Authentication credentials for Chroma (format depends on the auth provider)",
default=None,
)
diff --git a/api/configs/middleware/vdb/elasticsearch_config.py b/api/configs/middleware/vdb/elasticsearch_config.py
index 5b6a8fd939..df8182985d 100644
--- a/api/configs/middleware/vdb/elasticsearch_config.py
+++ b/api/configs/middleware/vdb/elasticsearch_config.py
@@ -6,25 +6,25 @@ from pydantic_settings import BaseSettings
class ElasticsearchConfig(BaseSettings):
"""
- Elasticsearch configs
+ Configuration settings for Elasticsearch
"""
ELASTICSEARCH_HOST: Optional[str] = Field(
- description="Elasticsearch host",
+ description="Hostname or IP address of the Elasticsearch server (e.g., 'localhost' or '192.168.1.100')",
default="127.0.0.1",
)
ELASTICSEARCH_PORT: PositiveInt = Field(
- description="Elasticsearch port",
+ description="Port number on which the Elasticsearch server is listening (default is 9200)",
default=9200,
)
ELASTICSEARCH_USERNAME: Optional[str] = Field(
- description="Elasticsearch username",
+ description="Username for authenticating with Elasticsearch (default is 'elastic')",
default="elastic",
)
ELASTICSEARCH_PASSWORD: Optional[str] = Field(
- description="Elasticsearch password",
+ description="Password for authenticating with Elasticsearch (default is 'elastic')",
default="elastic",
)
diff --git a/api/configs/middleware/vdb/milvus_config.py b/api/configs/middleware/vdb/milvus_config.py
index 98d375966a..231cbbbe8f 100644
--- a/api/configs/middleware/vdb/milvus_config.py
+++ b/api/configs/middleware/vdb/milvus_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class MilvusConfig(BaseSettings):
"""
- Milvus configs
+ Configuration settings for Milvus vector database
"""
MILVUS_URI: Optional[str] = Field(
- description="Milvus uri",
+ description="URI for connecting to the Milvus server (e.g., 'http://localhost:19530' or 'https://milvus-instance.example.com:19530')",
default="http://127.0.0.1:19530",
)
MILVUS_TOKEN: Optional[str] = Field(
- description="Milvus token",
+ description="Authentication token for Milvus, if token-based authentication is enabled",
default=None,
)
MILVUS_USER: Optional[str] = Field(
- description="Milvus user",
+ description="Username for authenticating with Milvus, if username/password authentication is enabled",
default=None,
)
MILVUS_PASSWORD: Optional[str] = Field(
- description="Milvus password",
+ description="Password for authenticating with Milvus, if username/password authentication is enabled",
default=None,
)
MILVUS_DATABASE: str = Field(
- description="Milvus database, default to `default`",
+ description="Name of the Milvus database to connect to (default is 'default')",
default="default",
)
diff --git a/api/configs/middleware/vdb/myscale_config.py b/api/configs/middleware/vdb/myscale_config.py
index 6451d26e1c..5896c19d27 100644
--- a/api/configs/middleware/vdb/myscale_config.py
+++ b/api/configs/middleware/vdb/myscale_config.py
@@ -3,35 +3,35 @@ from pydantic import BaseModel, Field, PositiveInt
class MyScaleConfig(BaseModel):
"""
- MyScale configs
+ Configuration settings for MyScale vector database
"""
MYSCALE_HOST: str = Field(
- description="MyScale host",
+ description="Hostname or IP address of the MyScale server (e.g., 'localhost' or 'myscale.example.com')",
default="localhost",
)
MYSCALE_PORT: PositiveInt = Field(
- description="MyScale port",
+ description="Port number on which the MyScale server is listening (default is 8123)",
default=8123,
)
MYSCALE_USER: str = Field(
- description="MyScale user",
+ description="Username for authenticating with MyScale (default is 'default')",
default="default",
)
MYSCALE_PASSWORD: str = Field(
- description="MyScale password",
+ description="Password for authenticating with MyScale (default is an empty string)",
default="",
)
MYSCALE_DATABASE: str = Field(
- description="MyScale database name",
+ description="Name of the MyScale database to connect to (default is 'default')",
default="default",
)
MYSCALE_FTS_PARAMS: str = Field(
- description="MyScale fts index parameters",
+ description="Additional parameters for MyScale Full Text Search index)",
default="",
)
diff --git a/api/configs/middleware/vdb/opensearch_config.py b/api/configs/middleware/vdb/opensearch_config.py
index 5823dc1433..81dde4c04d 100644
--- a/api/configs/middleware/vdb/opensearch_config.py
+++ b/api/configs/middleware/vdb/opensearch_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class OpenSearchConfig(BaseSettings):
"""
- OpenSearch configs
+ Configuration settings for OpenSearch
"""
OPENSEARCH_HOST: Optional[str] = Field(
- description="OpenSearch host",
+ description="Hostname or IP address of the OpenSearch server (e.g., 'localhost' or 'opensearch.example.com')",
default=None,
)
OPENSEARCH_PORT: PositiveInt = Field(
- description="OpenSearch port",
+ description="Port number on which the OpenSearch server is listening (default is 9200)",
default=9200,
)
OPENSEARCH_USER: Optional[str] = Field(
- description="OpenSearch user",
+ description="Username for authenticating with OpenSearch",
default=None,
)
OPENSEARCH_PASSWORD: Optional[str] = Field(
- description="OpenSearch password",
+ description="Password for authenticating with OpenSearch",
default=None,
)
OPENSEARCH_SECURE: bool = Field(
- description="whether to use SSL connection for OpenSearch",
+ description="Whether to use SSL/TLS encrypted connection for OpenSearch (True for HTTPS, False for HTTP)",
default=False,
)
diff --git a/api/configs/middleware/vdb/oracle_config.py b/api/configs/middleware/vdb/oracle_config.py
index 62614ae870..44e2f13345 100644
--- a/api/configs/middleware/vdb/oracle_config.py
+++ b/api/configs/middleware/vdb/oracle_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class OracleConfig(BaseSettings):
"""
- ORACLE configs
+ Configuration settings for Oracle database
"""
ORACLE_HOST: Optional[str] = Field(
- description="ORACLE host",
+ description="Hostname or IP address of the Oracle database server (e.g., 'localhost' or 'oracle.example.com')",
default=None,
)
ORACLE_PORT: Optional[PositiveInt] = Field(
- description="ORACLE port",
+ description="Port number on which the Oracle database server is listening (default is 1521)",
default=1521,
)
ORACLE_USER: Optional[str] = Field(
- description="ORACLE user",
+ description="Username for authenticating with the Oracle database",
default=None,
)
ORACLE_PASSWORD: Optional[str] = Field(
- description="ORACLE password",
+ description="Password for authenticating with the Oracle database",
default=None,
)
ORACLE_DATABASE: Optional[str] = Field(
- description="ORACLE database",
+ description="Name of the Oracle database or service to connect to (e.g., 'ORCL' or 'pdborcl')",
default=None,
)
diff --git a/api/configs/middleware/vdb/pgvector_config.py b/api/configs/middleware/vdb/pgvector_config.py
index 39a7c1d8d5..395dcaa420 100644
--- a/api/configs/middleware/vdb/pgvector_config.py
+++ b/api/configs/middleware/vdb/pgvector_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class PGVectorConfig(BaseSettings):
"""
- PGVector configs
+ Configuration settings for PGVector (PostgreSQL with vector extension)
"""
PGVECTOR_HOST: Optional[str] = Field(
- description="PGVector host",
+ description="Hostname or IP address of the PostgreSQL server with PGVector extension (e.g., 'localhost')",
default=None,
)
PGVECTOR_PORT: Optional[PositiveInt] = Field(
- description="PGVector port",
+ description="Port number on which the PostgreSQL server is listening (default is 5433)",
default=5433,
)
PGVECTOR_USER: Optional[str] = Field(
- description="PGVector user",
+ description="Username for authenticating with the PostgreSQL database",
default=None,
)
PGVECTOR_PASSWORD: Optional[str] = Field(
- description="PGVector password",
+ description="Password for authenticating with the PostgreSQL database",
default=None,
)
PGVECTOR_DATABASE: Optional[str] = Field(
- description="PGVector database",
+ description="Name of the PostgreSQL database to connect to",
default=None,
)
diff --git a/api/configs/middleware/vdb/pgvectors_config.py b/api/configs/middleware/vdb/pgvectors_config.py
index c40e5ff921..8d7a4b8d25 100644
--- a/api/configs/middleware/vdb/pgvectors_config.py
+++ b/api/configs/middleware/vdb/pgvectors_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class PGVectoRSConfig(BaseSettings):
"""
- PGVectoRS configs
+ Configuration settings for PGVecto.RS (Rust-based vector extension for PostgreSQL)
"""
PGVECTO_RS_HOST: Optional[str] = Field(
- description="PGVectoRS host",
+ description="Hostname or IP address of the PostgreSQL server with PGVecto.RS extension (e.g., 'localhost')",
default=None,
)
PGVECTO_RS_PORT: Optional[PositiveInt] = Field(
- description="PGVectoRS port",
+ description="Port number on which the PostgreSQL server with PGVecto.RS is listening (default is 5431)",
default=5431,
)
PGVECTO_RS_USER: Optional[str] = Field(
- description="PGVectoRS user",
+ description="Username for authenticating with the PostgreSQL database using PGVecto.RS",
default=None,
)
PGVECTO_RS_PASSWORD: Optional[str] = Field(
- description="PGVectoRS password",
+ description="Password for authenticating with the PostgreSQL database using PGVecto.RS",
default=None,
)
PGVECTO_RS_DATABASE: Optional[str] = Field(
- description="PGVectoRS database",
+ description="Name of the PostgreSQL database with PGVecto.RS extension to connect to",
default=None,
)
diff --git a/api/configs/middleware/vdb/qdrant_config.py b/api/configs/middleware/vdb/qdrant_config.py
index 27f75491c9..b70f624652 100644
--- a/api/configs/middleware/vdb/qdrant_config.py
+++ b/api/configs/middleware/vdb/qdrant_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class QdrantConfig(BaseSettings):
"""
- Qdrant configs
+ Configuration settings for Qdrant vector database
"""
QDRANT_URL: Optional[str] = Field(
- description="Qdrant url",
+ description="URL of the Qdrant server (e.g., 'http://localhost:6333' or 'https://qdrant.example.com')",
default=None,
)
QDRANT_API_KEY: Optional[str] = Field(
- description="Qdrant api key",
+ description="API key for authenticating with the Qdrant server",
default=None,
)
QDRANT_CLIENT_TIMEOUT: NonNegativeInt = Field(
- description="Qdrant client timeout in seconds",
+ description="Timeout in seconds for Qdrant client operations (default is 20 seconds)",
default=20,
)
QDRANT_GRPC_ENABLED: bool = Field(
- description="whether enable grpc support for Qdrant connection",
+ description="Whether to enable gRPC support for Qdrant connection (True for gRPC, False for HTTP)",
default=False,
)
QDRANT_GRPC_PORT: PositiveInt = Field(
- description="Qdrant grpc port",
+ description="Port number for gRPC connection to Qdrant server (default is 6334)",
default=6334,
)
diff --git a/api/configs/middleware/vdb/relyt_config.py b/api/configs/middleware/vdb/relyt_config.py
index 66b9ecc03f..5ffbea7b19 100644
--- a/api/configs/middleware/vdb/relyt_config.py
+++ b/api/configs/middleware/vdb/relyt_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class RelytConfig(BaseSettings):
"""
- Relyt configs
+ Configuration settings for Relyt database
"""
RELYT_HOST: Optional[str] = Field(
- description="Relyt host",
+ description="Hostname or IP address of the Relyt server (e.g., 'localhost' or 'relyt.example.com')",
default=None,
)
RELYT_PORT: PositiveInt = Field(
- description="Relyt port",
+ description="Port number on which the Relyt server is listening (default is 9200)",
default=9200,
)
RELYT_USER: Optional[str] = Field(
- description="Relyt user",
+ description="Username for authenticating with the Relyt database",
default=None,
)
RELYT_PASSWORD: Optional[str] = Field(
- description="Relyt password",
+ description="Password for authenticating with the Relyt database",
default=None,
)
RELYT_DATABASE: Optional[str] = Field(
- description="Relyt database",
+ description="Name of the Relyt database to connect to (default is 'default')",
default="default",
)
diff --git a/api/configs/middleware/vdb/tencent_vector_config.py b/api/configs/middleware/vdb/tencent_vector_config.py
index 46b4cb6a24..9cf4d07f6f 100644
--- a/api/configs/middleware/vdb/tencent_vector_config.py
+++ b/api/configs/middleware/vdb/tencent_vector_config.py
@@ -6,45 +6,45 @@ from pydantic_settings import BaseSettings
class TencentVectorDBConfig(BaseSettings):
"""
- Tencent Vector configs
+ Configuration settings for Tencent Vector Database
"""
TENCENT_VECTOR_DB_URL: Optional[str] = Field(
- description="Tencent Vector URL",
+ description="URL of the Tencent Vector Database service (e.g., 'https://vectordb.tencentcloudapi.com')",
default=None,
)
TENCENT_VECTOR_DB_API_KEY: Optional[str] = Field(
- description="Tencent Vector API key",
+ description="API key for authenticating with the Tencent Vector Database service",
default=None,
)
TENCENT_VECTOR_DB_TIMEOUT: PositiveInt = Field(
- description="Tencent Vector timeout in seconds",
+ description="Timeout in seconds for Tencent Vector Database operations (default is 30 seconds)",
default=30,
)
TENCENT_VECTOR_DB_USERNAME: Optional[str] = Field(
- description="Tencent Vector username",
+ description="Username for authenticating with the Tencent Vector Database (if required)",
default=None,
)
TENCENT_VECTOR_DB_PASSWORD: Optional[str] = Field(
- description="Tencent Vector password",
+ description="Password for authenticating with the Tencent Vector Database (if required)",
default=None,
)
TENCENT_VECTOR_DB_SHARD: PositiveInt = Field(
- description="Tencent Vector sharding number",
+ description="Number of shards for the Tencent Vector Database (default is 1)",
default=1,
)
TENCENT_VECTOR_DB_REPLICAS: NonNegativeInt = Field(
- description="Tencent Vector replicas",
+ description="Number of replicas for the Tencent Vector Database (default is 2)",
default=2,
)
TENCENT_VECTOR_DB_DATABASE: Optional[str] = Field(
- description="Tencent Vector Database",
+ description="Name of the specific Tencent Vector Database to connect to",
default=None,
)
diff --git a/api/configs/middleware/vdb/tidb_vector_config.py b/api/configs/middleware/vdb/tidb_vector_config.py
index dbcb276c01..bc68be69d8 100644
--- a/api/configs/middleware/vdb/tidb_vector_config.py
+++ b/api/configs/middleware/vdb/tidb_vector_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
class TiDBVectorConfig(BaseSettings):
"""
- TiDB Vector configs
+ Configuration settings for TiDB Vector database
"""
TIDB_VECTOR_HOST: Optional[str] = Field(
- description="TiDB Vector host",
+ description="Hostname or IP address of the TiDB Vector server (e.g., 'localhost' or 'tidb.example.com')",
default=None,
)
TIDB_VECTOR_PORT: Optional[PositiveInt] = Field(
- description="TiDB Vector port",
+ description="Port number on which the TiDB Vector server is listening (default is 4000)",
default=4000,
)
TIDB_VECTOR_USER: Optional[str] = Field(
- description="TiDB Vector user",
+ description="Username for authenticating with the TiDB Vector database",
default=None,
)
TIDB_VECTOR_PASSWORD: Optional[str] = Field(
- description="TiDB Vector password",
+ description="Password for authenticating with the TiDB Vector database",
default=None,
)
TIDB_VECTOR_DATABASE: Optional[str] = Field(
- description="TiDB Vector database",
+ description="Name of the TiDB Vector database to connect to",
default=None,
)
diff --git a/api/configs/middleware/vdb/weaviate_config.py b/api/configs/middleware/vdb/weaviate_config.py
index 63d1022f6a..25000e8bde 100644
--- a/api/configs/middleware/vdb/weaviate_config.py
+++ b/api/configs/middleware/vdb/weaviate_config.py
@@ -6,25 +6,25 @@ from pydantic_settings import BaseSettings
class WeaviateConfig(BaseSettings):
"""
- Weaviate configs
+ Configuration settings for Weaviate vector database
"""
WEAVIATE_ENDPOINT: Optional[str] = Field(
- description="Weaviate endpoint URL",
+ description="URL of the Weaviate server (e.g., 'http://localhost:8080' or 'https://weaviate.example.com')",
default=None,
)
WEAVIATE_API_KEY: Optional[str] = Field(
- description="Weaviate API key",
+ description="API key for authenticating with the Weaviate server",
default=None,
)
WEAVIATE_GRPC_ENABLED: bool = Field(
- description="whether to enable gRPC for Weaviate connection",
+ description="Whether to enable gRPC for Weaviate connection (True for gRPC, False for HTTP)",
default=True,
)
WEAVIATE_BATCH_SIZE: PositiveInt = Field(
- description="Weaviate batch size",
+ description="Number of objects to be processed in a single batch operation (default is 100)",
default=100,
)
diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py
index 3815a6fca2..c752660122 100644
--- a/api/configs/packaging/__init__.py
+++ b/api/configs/packaging/__init__.py
@@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field(
description="Dify version",
- default="0.8.2",
+ default="0.8.3",
)
COMMIT_SHA: str = Field(
diff --git a/api/constants/__init__.py b/api/constants/__init__.py
index e22c3268ef..75eaf81638 100644
--- a/api/constants/__init__.py
+++ b/api/constants/__init__.py
@@ -1 +1,2 @@
HIDDEN_VALUE = "[__HIDDEN__]"
+UUID_NIL = "00000000-0000-0000-0000-000000000000"
diff --git a/api/controllers/console/app/completion.py b/api/controllers/console/app/completion.py
index 53de51c24d..d3296d3dff 100644
--- a/api/controllers/console/app/completion.py
+++ b/api/controllers/console/app/completion.py
@@ -109,6 +109,7 @@ class ChatMessageApi(Resource):
parser.add_argument("files", type=list, required=False, location="json")
parser.add_argument("model_config", type=dict, required=True, location="json")
parser.add_argument("conversation_id", type=uuid_value, location="json")
+ parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
args = parser.parse_args()
diff --git a/api/controllers/console/app/message.py b/api/controllers/console/app/message.py
index fe06201982..2fba3e0af0 100644
--- a/api/controllers/console/app/message.py
+++ b/api/controllers/console/app/message.py
@@ -105,8 +105,6 @@ class ChatMessageListApi(Resource):
if rest_count > 0:
has_more = True
- history_messages = list(reversed(history_messages))
-
return InfiniteScrollPagination(data=history_messages, limit=args["limit"], has_more=has_more)
diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py
index b488deb89d..0a693b84e2 100644
--- a/api/controllers/console/app/workflow.py
+++ b/api/controllers/console/app/workflow.py
@@ -166,6 +166,8 @@ class AdvancedChatDraftWorkflowRunApi(Resource):
parser.add_argument("query", type=str, required=True, location="json", default="")
parser.add_argument("files", type=list, location="json")
parser.add_argument("conversation_id", type=uuid_value, location="json")
+ parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
+
args = parser.parse_args()
try:
diff --git a/api/controllers/console/explore/completion.py b/api/controllers/console/explore/completion.py
index f464692098..125bc1af8c 100644
--- a/api/controllers/console/explore/completion.py
+++ b/api/controllers/console/explore/completion.py
@@ -100,6 +100,7 @@ class ChatApi(InstalledAppResource):
parser.add_argument("query", type=str, required=True, location="json")
parser.add_argument("files", type=list, required=False, location="json")
parser.add_argument("conversation_id", type=uuid_value, location="json")
+ parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
args = parser.parse_args()
diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py
index 0e0238556c..3d221ff30a 100644
--- a/api/controllers/console/explore/message.py
+++ b/api/controllers/console/explore/message.py
@@ -51,7 +51,7 @@ class MessageListApi(InstalledAppResource):
try:
return MessageService.pagination_by_first_id(
- app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
+ app_model, current_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py
index e54e6f4903..a70ee89b5e 100644
--- a/api/controllers/service_api/app/message.py
+++ b/api/controllers/service_api/app/message.py
@@ -54,6 +54,7 @@ class MessageListApi(Resource):
message_fields = {
"id": fields.String,
"conversation_id": fields.String,
+ "parent_message_id": fields.String,
"inputs": fields.Raw,
"query": fields.String,
"answer": fields.String(attribute="re_sign_file_url_answer"),
diff --git a/api/controllers/web/completion.py b/api/controllers/web/completion.py
index 115492b796..45b890dfc4 100644
--- a/api/controllers/web/completion.py
+++ b/api/controllers/web/completion.py
@@ -96,6 +96,7 @@ class ChatApi(WebApiResource):
parser.add_argument("files", type=list, required=False, location="json")
parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
parser.add_argument("conversation_id", type=uuid_value, location="json")
+ parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
parser.add_argument("retriever_from", type=str, required=False, default="web_app", location="json")
args = parser.parse_args()
diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py
index 0d4047f4ef..2d2a5866c8 100644
--- a/api/controllers/web/message.py
+++ b/api/controllers/web/message.py
@@ -57,6 +57,7 @@ class MessageListApi(WebApiResource):
message_fields = {
"id": fields.String,
"conversation_id": fields.String,
+ "parent_message_id": fields.String,
"inputs": fields.Raw,
"query": fields.String,
"answer": fields.String(attribute="re_sign_file_url_answer"),
@@ -89,7 +90,7 @@ class MessageListApi(WebApiResource):
try:
return MessageService.pagination_by_first_id(
- app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
+ app_model, end_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py
index d09a9956a4..5295f97bdb 100644
--- a/api/core/agent/base_agent_runner.py
+++ b/api/core/agent/base_agent_runner.py
@@ -32,6 +32,7 @@ from core.model_runtime.entities.message_entities import (
from core.model_runtime.entities.model_entities import ModelFeature
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.utils.encoders import jsonable_encoder
+from core.prompt.utils.extract_thread_messages import extract_thread_messages
from core.tools.entities.tool_entities import (
ToolParameter,
ToolRuntimeVariablePool,
@@ -441,10 +442,12 @@ class BaseAgentRunner(AppRunner):
.filter(
Message.conversation_id == self.message.conversation_id,
)
- .order_by(Message.created_at.asc())
+ .order_by(Message.created_at.desc())
.all()
)
+ messages = list(reversed(extract_thread_messages(messages)))
+
for message in messages:
if message.id == self.message.id:
continue
diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py
index 88e1256ed5..445ef6d0ab 100644
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@@ -121,6 +121,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config),
query=query,
files=file_objs,
+ parent_message_id=args.get("parent_message_id"),
user_id=user.id,
stream=stream,
invoke_from=invoke_from,
diff --git a/api/core/app/apps/agent_chat/app_generator.py b/api/core/app/apps/agent_chat/app_generator.py
index abf8a332ab..99abccf4f9 100644
--- a/api/core/app/apps/agent_chat/app_generator.py
+++ b/api/core/app/apps/agent_chat/app_generator.py
@@ -127,6 +127,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config),
query=query,
files=file_objs,
+ parent_message_id=args.get("parent_message_id"),
user_id=user.id,
stream=stream,
invoke_from=invoke_from,
diff --git a/api/core/app/apps/base_app_generate_response_converter.py b/api/core/app/apps/base_app_generate_response_converter.py
index c6855ac854..62e79ec444 100644
--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@@ -75,10 +75,10 @@ class AppGenerateResponseConverter(ABC):
:return:
"""
# show_retrieve_source
+ updated_resources = []
if "retriever_resources" in metadata:
- metadata["retriever_resources"] = []
for resource in metadata["retriever_resources"]:
- metadata["retriever_resources"].append(
+ updated_resources.append(
{
"segment_id": resource["segment_id"],
"position": resource["position"],
@@ -87,6 +87,7 @@ class AppGenerateResponseConverter(ABC):
"content": resource["content"],
}
)
+ metadata["retriever_resources"] = updated_resources
# show annotation reply
if "annotation_reply" in metadata:
diff --git a/api/core/app/apps/chat/app_generator.py b/api/core/app/apps/chat/app_generator.py
index 032556ec4c..9ef1366a0f 100644
--- a/api/core/app/apps/chat/app_generator.py
+++ b/api/core/app/apps/chat/app_generator.py
@@ -128,6 +128,7 @@ class ChatAppGenerator(MessageBasedAppGenerator):
inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config),
query=query,
files=file_objs,
+ parent_message_id=args.get("parent_message_id"),
user_id=user.id,
stream=stream,
invoke_from=invoke_from,
diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py
index c4db95cbd0..65b759acf5 100644
--- a/api/core/app/apps/message_based_app_generator.py
+++ b/api/core/app/apps/message_based_app_generator.py
@@ -218,6 +218,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):
answer_tokens=0,
answer_unit_price=0,
answer_price_unit=0,
+ parent_message_id=getattr(application_generate_entity, "parent_message_id", None),
provider_response_latency=0,
total_price=0,
currency="USD",
diff --git a/api/core/app/entities/app_invoke_entities.py b/api/core/app/entities/app_invoke_entities.py
index ab8d4e374e..87ca51ef1b 100644
--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@@ -122,6 +122,7 @@ class ChatAppGenerateEntity(EasyUIBasedAppGenerateEntity):
"""
conversation_id: Optional[str] = None
+ parent_message_id: Optional[str] = None
class CompletionAppGenerateEntity(EasyUIBasedAppGenerateEntity):
@@ -138,6 +139,7 @@ class AgentChatAppGenerateEntity(EasyUIBasedAppGenerateEntity):
"""
conversation_id: Optional[str] = None
+ parent_message_id: Optional[str] = None
class AdvancedChatAppGenerateEntity(AppGenerateEntity):
@@ -149,6 +151,7 @@ class AdvancedChatAppGenerateEntity(AppGenerateEntity):
app_config: WorkflowUIBasedAppConfig
conversation_id: Optional[str] = None
+ parent_message_id: Optional[str] = None
query: str
class SingleIterationRunEntity(BaseModel):
diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py
index 78a6d6e683..39bd6fee69 100644
--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@@ -47,6 +47,8 @@ class LLMGenerator:
)
answer = response.message.content
cleaned_answer = re.sub(r"^.*(\{.*\}).*$", r"\1", answer, flags=re.DOTALL)
+ if cleaned_answer is None:
+ return ""
result_dict = json.loads(cleaned_answer)
answer = result_dict["Your Output"]
name = answer.strip()
diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py
index c40b6d1808..e5b6784516 100644
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -65,7 +65,6 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
"Please help me predict the three most likely questions that human would ask, "
"and keeping each question under 20 characters.\n"
"MAKE SURE your output is the SAME language as the Assistant's latest response"
- "(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
"The output must be an array in JSON format following the specified schema:\n"
'["question1","question2","question3"]\n'
)
diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py
index d3185c3b11..60b36c50f0 100644
--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -11,6 +11,7 @@ from core.model_runtime.entities.message_entities import (
TextPromptMessageContent,
UserPromptMessage,
)
+from core.prompt.utils.extract_thread_messages import extract_thread_messages
from extensions.ext_database import db
from models.model import AppMode, Conversation, Message, MessageFile
from models.workflow import WorkflowRun
@@ -33,8 +34,17 @@ class TokenBufferMemory:
# fetch limited messages, and return reversed
query = (
- db.session.query(Message.id, Message.query, Message.answer, Message.created_at, Message.workflow_run_id)
- .filter(Message.conversation_id == self.conversation.id, Message.answer != "")
+ db.session.query(
+ Message.id,
+ Message.query,
+ Message.answer,
+ Message.created_at,
+ Message.workflow_run_id,
+ Message.parent_message_id,
+ )
+ .filter(
+ Message.conversation_id == self.conversation.id,
+ )
.order_by(Message.created_at.desc())
)
@@ -45,7 +55,12 @@ class TokenBufferMemory:
messages = query.limit(message_limit).all()
- messages = list(reversed(messages))
+ # instead of all messages from the conversation, we only need to extract messages
+ # that belong to the thread of last message
+ thread_messages = extract_thread_messages(messages)
+ thread_messages.pop(0)
+ messages = list(reversed(thread_messages))
+
message_file_parser = MessageFileParser(tenant_id=app_record.tenant_id, app_id=app_record.id)
prompt_messages = []
for message in messages:
diff --git a/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md b/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md
index 56f379a92f..17fc088a63 100644
--- a/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md
+++ b/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md
@@ -62,7 +62,7 @@ pricing: # 价格信息
建议将所有模型配置都准备完毕后再开始模型代码的实现。
-同样,也可以参考 `model_providers` 目录下其他供应商对应模型类型目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#AIModel)。
+同样,也可以参考 `model_providers` 目录下其他供应商对应模型类型目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#aimodelentity)。
### 实现模型调用代码
diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index d10314ba03..1f5f64019a 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -37,3 +37,4 @@
- siliconflow
- perfxcloud
- zhinao
+- fireworks
diff --git a/api/core/model_runtime/model_providers/fireworks/__init__.py b/api/core/model_runtime/model_providers/fireworks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg
new file mode 100644
index 0000000000..582605cc42
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg
@@ -0,0 +1,3 @@
+
\ No newline at end of file
diff --git a/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg
new file mode 100644
index 0000000000..86eeba66f9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg
@@ -0,0 +1,5 @@
+
diff --git a/api/core/model_runtime/model_providers/fireworks/_common.py b/api/core/model_runtime/model_providers/fireworks/_common.py
new file mode 100644
index 0000000000..378ced3a40
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/_common.py
@@ -0,0 +1,52 @@
+from collections.abc import Mapping
+
+import openai
+
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+
+
+class _CommonFireworks:
+ def _to_credential_kwargs(self, credentials: Mapping) -> dict:
+ """
+ Transform credentials to kwargs for model instance
+
+ :param credentials:
+ :return:
+ """
+ credentials_kwargs = {
+ "api_key": credentials["fireworks_api_key"],
+ "base_url": "https://api.fireworks.ai/inference/v1",
+ "max_retries": 1,
+ }
+
+ return credentials_kwargs
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ The key is the error type thrown to the caller
+ The value is the error type thrown by the model,
+ which needs to be converted into a unified error type for the caller.
+
+ :return: Invoke error mapping
+ """
+ return {
+ InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
+ InvokeServerUnavailableError: [openai.InternalServerError],
+ InvokeRateLimitError: [openai.RateLimitError],
+ InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError],
+ InvokeBadRequestError: [
+ openai.BadRequestError,
+ openai.NotFoundError,
+ openai.UnprocessableEntityError,
+ openai.APIError,
+ ],
+ }
diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.py b/api/core/model_runtime/model_providers/fireworks/fireworks.py
new file mode 100644
index 0000000000..15f25badab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.py
@@ -0,0 +1,27 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class FireworksProvider(ModelProvider):
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.LLM)
+ model_instance.validate_credentials(
+ model="accounts/fireworks/models/llama-v3p1-8b-instruct", credentials=credentials
+ )
+ except CredentialsValidateFailedError as ex:
+ raise ex
+ except Exception as ex:
+ logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+ raise ex
diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
new file mode 100644
index 0000000000..f886fa23b5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@@ -0,0 +1,29 @@
+provider: fireworks
+label:
+ zh_Hans: Fireworks AI
+ en_US: Fireworks AI
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FCFDFF"
+help:
+ title:
+ en_US: Get your API Key from Fireworks AI
+ zh_Hans: 从 Fireworks AI 获取 API Key
+ url:
+ en_US: https://fireworks.ai/account/api-keys
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: fireworks_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/__init__.py b/api/core/model_runtime/model_providers/fireworks/llm/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml b/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml
new file mode 100644
index 0000000000..9f7c1af68c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml
@@ -0,0 +1,16 @@
+- llama-v3p1-405b-instruct
+- llama-v3p1-70b-instruct
+- llama-v3p1-8b-instruct
+- llama-v3-70b-instruct
+- mixtral-8x22b-instruct
+- mixtral-8x7b-instruct
+- firefunction-v2
+- firefunction-v1
+- gemma2-9b-it
+- llama-v3-70b-instruct-hf
+- llama-v3-8b-instruct
+- llama-v3-8b-instruct-hf
+- mixtral-8x7b-instruct-hf
+- mythomax-l2-13b
+- phi-3-vision-128k-instruct
+- yi-large
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml
new file mode 100644
index 0000000000..f6bac12832
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/firefunction-v1
+label:
+ zh_Hans: Firefunction V1
+ en_US: Firefunction V1
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.5'
+ output: '0.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml
new file mode 100644
index 0000000000..2979cb46d5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/firefunction-v2
+label:
+ zh_Hans: Firefunction V2
+ en_US: Firefunction V2
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml b/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml
new file mode 100644
index 0000000000..ee41a7e2fd
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml
@@ -0,0 +1,45 @@
+model: accounts/fireworks/models/gemma2-9b-it
+label:
+ zh_Hans: Gemma2 9B Instruct
+ en_US: Gemma2 9B Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml
new file mode 100644
index 0000000000..2ae89b8816
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-70b-instruct-hf
+label:
+ zh_Hans: Llama3 70B Instruct(HF version)
+ en_US: Llama3 70B Instruct(HF version)
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml
new file mode 100644
index 0000000000..7c24b08ca5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-70b-instruct
+label:
+ zh_Hans: Llama3 70B Instruct
+ en_US: Llama3 70B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml
new file mode 100644
index 0000000000..83507ef3e5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-8b-instruct-hf
+label:
+ zh_Hans: Llama3 8B Instruct(HF version)
+ en_US: Llama3 8B Instruct(HF version)
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml
new file mode 100644
index 0000000000..d8ac9537b8
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-8b-instruct
+label:
+ zh_Hans: Llama3 8B Instruct
+ en_US: Llama3 8B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml
new file mode 100644
index 0000000000..c4ddb3e924
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-405b-instruct
+label:
+ zh_Hans: Llama3.1 405B Instruct
+ en_US: Llama3.1 405B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '3'
+ output: '3'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml
new file mode 100644
index 0000000000..62f84f87fa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-70b-instruct
+label:
+ zh_Hans: Llama3.1 70B Instruct
+ en_US: Llama3.1 70B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml
new file mode 100644
index 0000000000..9bb99c91b6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-8b-instruct
+label:
+ zh_Hans: Llama3.1 8B Instruct
+ en_US: Llama3.1 8B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llm.py b/api/core/model_runtime/model_providers/fireworks/llm/llm.py
new file mode 100644
index 0000000000..2dcf1adba6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llm.py
@@ -0,0 +1,610 @@
+import logging
+from collections.abc import Generator
+from typing import Optional, Union, cast
+
+from openai import OpenAI, Stream
+from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall
+from openai.types.chat.chat_completion_message import FunctionCall
+
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+ AssistantPromptMessage,
+ ImagePromptMessageContent,
+ PromptMessage,
+ PromptMessageContentType,
+ PromptMessageTool,
+ SystemPromptMessage,
+ TextPromptMessageContent,
+ ToolPromptMessage,
+ UserPromptMessage,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
+
+logger = logging.getLogger(__name__)
+
+FIREWORKS_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+
+
+{{instructions}}
+
+""" # noqa: E501
+
+
+class FireworksLargeLanguageModel(_CommonFireworks, LargeLanguageModel):
+ """
+ Model class for Fireworks large language model.
+ """
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None,
+ stop: Optional[list[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ ) -> Union[LLMResult, Generator]:
+ """
+ Invoke large language model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param prompt_messages: prompt messages
+ :param model_parameters: model parameters
+ :param tools: tools for tool calling
+ :param stop: stop words
+ :param stream: is stream response
+ :param user: unique user id
+ :return: full response or stream response chunk generator result
+ """
+
+ return self._chat_generate(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ )
+
+ def _code_block_mode_wrapper(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None,
+ stop: Optional[list[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ callbacks: Optional[list[Callback]] = None,
+ ) -> Union[LLMResult, Generator]:
+ """
+ Code block mode wrapper for invoking large language model
+ """
+ if "response_format" in model_parameters and model_parameters["response_format"] in {"JSON", "XML"}:
+ stop = stop or []
+ self._transform_chat_json_prompts(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ response_format=model_parameters["response_format"],
+ )
+ model_parameters.pop("response_format")
+
+ return self._invoke(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ )
+
+ def _transform_chat_json_prompts(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ model_parameters: dict,
+ tools: list[PromptMessageTool] | None = None,
+ stop: list[str] | None = None,
+ stream: bool = True,
+ user: str | None = None,
+ response_format: str = "JSON",
+ ) -> None:
+ """
+ Transform json prompts
+ """
+ if stop is None:
+ stop = []
+ if "```\n" not in stop:
+ stop.append("```\n")
+ if "\n```" not in stop:
+ stop.append("\n```")
+
+ if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
+ prompt_messages[0] = SystemPromptMessage(
+ content=FIREWORKS_BLOCK_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content).replace(
+ "{{block}}", response_format
+ )
+ )
+ prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n"))
+ else:
+ prompt_messages.insert(
+ 0,
+ SystemPromptMessage(
+ content=FIREWORKS_BLOCK_MODE_PROMPT.replace(
+ "{{instructions}}", f"Please output a valid {response_format} object."
+ ).replace("{{block}}", response_format)
+ ),
+ )
+ prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
+
+ def get_num_tokens(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None,
+ ) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return:
+ """
+ return self._num_tokens_from_messages(model, prompt_messages, tools)
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ client.chat.completions.create(
+ messages=[{"role": "user", "content": "ping"}], model=model, temperature=0, max_tokens=10, stream=False
+ )
+ except Exception as e:
+ raise CredentialsValidateFailedError(str(e))
+
+ def _chat_generate(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None,
+ stop: Optional[list[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ ) -> Union[LLMResult, Generator]:
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ extra_model_kwargs = {}
+
+ if tools:
+ extra_model_kwargs["functions"] = [
+ {"name": tool.name, "description": tool.description, "parameters": tool.parameters} for tool in tools
+ ]
+
+ if stop:
+ extra_model_kwargs["stop"] = stop
+
+ if user:
+ extra_model_kwargs["user"] = user
+
+ # chat model
+ response = client.chat.completions.create(
+ messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
+ model=model,
+ stream=stream,
+ **model_parameters,
+ **extra_model_kwargs,
+ )
+
+ if stream:
+ return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools)
+ return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)
+
+ def _handle_chat_generate_response(
+ self,
+ model: str,
+ credentials: dict,
+ response: ChatCompletion,
+ prompt_messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None,
+ ) -> LLMResult:
+ """
+ Handle llm chat response
+
+ :param model: model name
+ :param credentials: credentials
+ :param response: response
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return: llm response
+ """
+ assistant_message = response.choices[0].message
+ # assistant_message_tool_calls = assistant_message.tool_calls
+ assistant_message_function_call = assistant_message.function_call
+
+ # extract tool calls from response
+ # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+ function_call = self._extract_response_function_call(assistant_message_function_call)
+ tool_calls = [function_call] if function_call else []
+
+ # transform assistant message to prompt message
+ assistant_prompt_message = AssistantPromptMessage(content=assistant_message.content, tool_calls=tool_calls)
+
+ # calculate num tokens
+ if response.usage:
+ # transform usage
+ prompt_tokens = response.usage.prompt_tokens
+ completion_tokens = response.usage.completion_tokens
+ else:
+ # calculate num tokens
+ prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+ completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message])
+
+ # transform usage
+ usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+
+ # transform response
+ response = LLMResult(
+ model=response.model,
+ prompt_messages=prompt_messages,
+ message=assistant_prompt_message,
+ usage=usage,
+ system_fingerprint=response.system_fingerprint,
+ )
+
+ return response
+
+ def _handle_chat_generate_stream_response(
+ self,
+ model: str,
+ credentials: dict,
+ response: Stream[ChatCompletionChunk],
+ prompt_messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None,
+ ) -> Generator:
+ """
+ Handle llm chat stream response
+
+ :param model: model name
+ :param response: response
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return: llm response chunk generator
+ """
+ full_assistant_content = ""
+ delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None
+ prompt_tokens = 0
+ completion_tokens = 0
+ final_tool_calls = []
+ final_chunk = LLMResultChunk(
+ model=model,
+ prompt_messages=prompt_messages,
+ delta=LLMResultChunkDelta(
+ index=0,
+ message=AssistantPromptMessage(content=""),
+ ),
+ )
+
+ for chunk in response:
+ if len(chunk.choices) == 0:
+ if chunk.usage:
+ # calculate num tokens
+ prompt_tokens = chunk.usage.prompt_tokens
+ completion_tokens = chunk.usage.completion_tokens
+ continue
+
+ delta = chunk.choices[0]
+ has_finish_reason = delta.finish_reason is not None
+
+ if (
+ not has_finish_reason
+ and (delta.delta.content is None or delta.delta.content == "")
+ and delta.delta.function_call is None
+ ):
+ continue
+
+ # assistant_message_tool_calls = delta.delta.tool_calls
+ assistant_message_function_call = delta.delta.function_call
+
+ # extract tool calls from response
+ if delta_assistant_message_function_call_storage is not None:
+ # handle process of stream function call
+ if assistant_message_function_call:
+ # message has not ended ever
+ delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments
+ continue
+ else:
+ # message has ended
+ assistant_message_function_call = delta_assistant_message_function_call_storage
+ delta_assistant_message_function_call_storage = None
+ else:
+ if assistant_message_function_call:
+ # start of stream function call
+ delta_assistant_message_function_call_storage = assistant_message_function_call
+ if delta_assistant_message_function_call_storage.arguments is None:
+ delta_assistant_message_function_call_storage.arguments = ""
+ if not has_finish_reason:
+ continue
+
+ # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+ function_call = self._extract_response_function_call(assistant_message_function_call)
+ tool_calls = [function_call] if function_call else []
+ if tool_calls:
+ final_tool_calls.extend(tool_calls)
+
+ # transform assistant message to prompt message
+ assistant_prompt_message = AssistantPromptMessage(content=delta.delta.content or "", tool_calls=tool_calls)
+
+ full_assistant_content += delta.delta.content or ""
+
+ if has_finish_reason:
+ final_chunk = LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ finish_reason=delta.finish_reason,
+ ),
+ )
+ else:
+ yield LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ ),
+ )
+
+ if not prompt_tokens:
+ prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+
+ if not completion_tokens:
+ full_assistant_prompt_message = AssistantPromptMessage(
+ content=full_assistant_content, tool_calls=final_tool_calls
+ )
+ completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message])
+
+ # transform usage
+ usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+ final_chunk.delta.usage = usage
+
+ yield final_chunk
+
+ def _extract_response_tool_calls(
+ self, response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]
+ ) -> list[AssistantPromptMessage.ToolCall]:
+ """
+ Extract tool calls from response
+
+ :param response_tool_calls: response tool calls
+ :return: list of tool calls
+ """
+ tool_calls = []
+ if response_tool_calls:
+ for response_tool_call in response_tool_calls:
+ function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+ name=response_tool_call.function.name, arguments=response_tool_call.function.arguments
+ )
+
+ tool_call = AssistantPromptMessage.ToolCall(
+ id=response_tool_call.id, type=response_tool_call.type, function=function
+ )
+ tool_calls.append(tool_call)
+
+ return tool_calls
+
+ def _extract_response_function_call(
+ self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall
+ ) -> AssistantPromptMessage.ToolCall:
+ """
+ Extract function call from response
+
+ :param response_function_call: response function call
+ :return: tool call
+ """
+ tool_call = None
+ if response_function_call:
+ function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+ name=response_function_call.name, arguments=response_function_call.arguments
+ )
+
+ tool_call = AssistantPromptMessage.ToolCall(
+ id=response_function_call.name, type="function", function=function
+ )
+
+ return tool_call
+
+ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+ """
+ Convert PromptMessage to dict for Fireworks API
+ """
+ if isinstance(message, UserPromptMessage):
+ message = cast(UserPromptMessage, message)
+ if isinstance(message.content, str):
+ message_dict = {"role": "user", "content": message.content}
+ else:
+ sub_messages = []
+ for message_content in message.content:
+ if message_content.type == PromptMessageContentType.TEXT:
+ message_content = cast(TextPromptMessageContent, message_content)
+ sub_message_dict = {"type": "text", "text": message_content.data}
+ sub_messages.append(sub_message_dict)
+ elif message_content.type == PromptMessageContentType.IMAGE:
+ message_content = cast(ImagePromptMessageContent, message_content)
+ sub_message_dict = {
+ "type": "image_url",
+ "image_url": {"url": message_content.data, "detail": message_content.detail.value},
+ }
+ sub_messages.append(sub_message_dict)
+
+ message_dict = {"role": "user", "content": sub_messages}
+ elif isinstance(message, AssistantPromptMessage):
+ message = cast(AssistantPromptMessage, message)
+ message_dict = {"role": "assistant", "content": message.content}
+ if message.tool_calls:
+ # message_dict["tool_calls"] = [tool_call.dict() for tool_call in
+ # message.tool_calls]
+ function_call = message.tool_calls[0]
+ message_dict["function_call"] = {
+ "name": function_call.function.name,
+ "arguments": function_call.function.arguments,
+ }
+ elif isinstance(message, SystemPromptMessage):
+ message = cast(SystemPromptMessage, message)
+ message_dict = {"role": "system", "content": message.content}
+ elif isinstance(message, ToolPromptMessage):
+ message = cast(ToolPromptMessage, message)
+ # message_dict = {
+ # "role": "tool",
+ # "content": message.content,
+ # "tool_call_id": message.tool_call_id
+ # }
+ message_dict = {"role": "function", "content": message.content, "name": message.tool_call_id}
+ else:
+ raise ValueError(f"Got unknown type {message}")
+
+ if message.name:
+ message_dict["name"] = message.name
+
+ return message_dict
+
+ def _num_tokens_from_messages(
+ self,
+ model: str,
+ messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None,
+ credentials: dict = None,
+ ) -> int:
+ """
+ Approximate num tokens with GPT2 tokenizer.
+ """
+
+ tokens_per_message = 3
+ tokens_per_name = 1
+
+ num_tokens = 0
+ messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages]
+ for message in messages_dict:
+ num_tokens += tokens_per_message
+ for key, value in message.items():
+ # Cast str(value) in case the message value is not a string
+ # This occurs with function messages
+ # TODO: The current token calculation method for the image type is not implemented,
+ # which need to download the image and then get the resolution for calculation,
+ # and will increase the request delay
+ if isinstance(value, list):
+ text = ""
+ for item in value:
+ if isinstance(item, dict) and item["type"] == "text":
+ text += item["text"]
+
+ value = text
+
+ if key == "tool_calls":
+ for tool_call in value:
+ for t_key, t_value in tool_call.items():
+ num_tokens += self._get_num_tokens_by_gpt2(t_key)
+ if t_key == "function":
+ for f_key, f_value in t_value.items():
+ num_tokens += self._get_num_tokens_by_gpt2(f_key)
+ num_tokens += self._get_num_tokens_by_gpt2(f_value)
+ else:
+ num_tokens += self._get_num_tokens_by_gpt2(t_key)
+ num_tokens += self._get_num_tokens_by_gpt2(t_value)
+ else:
+ num_tokens += self._get_num_tokens_by_gpt2(str(value))
+
+ if key == "name":
+ num_tokens += tokens_per_name
+
+ # every reply is primed with assistant
+ num_tokens += 3
+
+ if tools:
+ num_tokens += self._num_tokens_for_tools(tools)
+
+ return num_tokens
+
+ def _num_tokens_for_tools(self, tools: list[PromptMessageTool]) -> int:
+ """
+ Calculate num tokens for tool calling with tiktoken package.
+
+ :param tools: tools for tool calling
+ :return: number of tokens
+ """
+ num_tokens = 0
+ for tool in tools:
+ num_tokens += self._get_num_tokens_by_gpt2("type")
+ num_tokens += self._get_num_tokens_by_gpt2("function")
+ num_tokens += self._get_num_tokens_by_gpt2("function")
+
+ # calculate num tokens for function object
+ num_tokens += self._get_num_tokens_by_gpt2("name")
+ num_tokens += self._get_num_tokens_by_gpt2(tool.name)
+ num_tokens += self._get_num_tokens_by_gpt2("description")
+ num_tokens += self._get_num_tokens_by_gpt2(tool.description)
+ parameters = tool.parameters
+ num_tokens += self._get_num_tokens_by_gpt2("parameters")
+ if "title" in parameters:
+ num_tokens += self._get_num_tokens_by_gpt2("title")
+ num_tokens += self._get_num_tokens_by_gpt2(parameters.get("title"))
+ num_tokens += self._get_num_tokens_by_gpt2("type")
+ num_tokens += self._get_num_tokens_by_gpt2(parameters.get("type"))
+ if "properties" in parameters:
+ num_tokens += self._get_num_tokens_by_gpt2("properties")
+ for key, value in parameters.get("properties").items():
+ num_tokens += self._get_num_tokens_by_gpt2(key)
+ for field_key, field_value in value.items():
+ num_tokens += self._get_num_tokens_by_gpt2(field_key)
+ if field_key == "enum":
+ for enum_field in field_value:
+ num_tokens += 3
+ num_tokens += self._get_num_tokens_by_gpt2(enum_field)
+ else:
+ num_tokens += self._get_num_tokens_by_gpt2(field_key)
+ num_tokens += self._get_num_tokens_by_gpt2(str(field_value))
+ if "required" in parameters:
+ num_tokens += self._get_num_tokens_by_gpt2("required")
+ for required_field in parameters["required"]:
+ num_tokens += 3
+ num_tokens += self._get_num_tokens_by_gpt2(required_field)
+
+ return num_tokens
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml
new file mode 100644
index 0000000000..87d977e26c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x22b-instruct
+label:
+ zh_Hans: Mixtral MoE 8x22B Instruct
+ en_US: Mixtral MoE 8x22B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 65536
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '1.2'
+ output: '1.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml
new file mode 100644
index 0000000000..e3d5a90858
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x7b-instruct-hf
+label:
+ zh_Hans: Mixtral MoE 8x7B Instruct(HF version)
+ en_US: Mixtral MoE 8x7B Instruct(HF version)
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.5'
+ output: '0.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml
new file mode 100644
index 0000000000..45f632ceff
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x7b-instruct
+label:
+ zh_Hans: Mixtral MoE 8x7B Instruct
+ en_US: Mixtral MoE 8x7B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.5'
+ output: '0.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml
new file mode 100644
index 0000000000..9c3486ba10
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mythomax-l2-13b
+label:
+ zh_Hans: MythoMax L2 13b
+ en_US: MythoMax L2 13b
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml
new file mode 100644
index 0000000000..e399f2edb1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/phi-3-vision-128k-instruct
+label:
+ zh_Hans: Phi3.5 Vision Instruct
+ en_US: Phi3.5 Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml b/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml
new file mode 100644
index 0000000000..bb4b6f994e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml
@@ -0,0 +1,45 @@
+model: accounts/yi-01-ai/models/yi-large
+label:
+ zh_Hans: Yi-Large
+ en_US: Yi-Large
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '3'
+ output: '3'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
new file mode 100644
index 0000000000..bbc697e934
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-flash-8b-exp-0827
+label:
+ en_US: Gemini 1.5 Flash 8B 0827
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
new file mode 100644
index 0000000000..c5695e5dda
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-flash-exp-0827
+label:
+ en_US: Gemini 1.5 Flash 0827
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
new file mode 100644
index 0000000000..0a918e0d7b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-pro-exp-0801
+label:
+ en_US: Gemini 1.5 Pro 0801
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
new file mode 100644
index 0000000000..7452ce46e7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-pro-exp-0827
+label:
+ en_US: Gemini 1.5 Pro 0827
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
index d65dc02674..b3e1ecf3af 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@@ -9,7 +9,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
- context_size: 1048576
+ context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml
index ca8600a534..f494984443 100644
--- a/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml
@@ -3,3 +3,4 @@
- hunyuan-standard-256k
- hunyuan-pro
- hunyuan-turbo
+- hunyuan-vision
diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml
new file mode 100644
index 0000000000..9edc7f4710
--- /dev/null
+++ b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml
@@ -0,0 +1,39 @@
+model: hunyuan-vision
+label:
+ zh_Hans: hunyuan-vision
+ en_US: hunyuan-vision
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+ - vision
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+ - name: enable_enhance
+ label:
+ zh_Hans: 功能增强
+ en_US: Enable Enhancement
+ type: boolean
+ help:
+ zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+ default: true
+pricing:
+ input: '0.018'
+ output: '0.018'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py
index b57e5e1c2b..2014de8516 100644
--- a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py
@@ -1,6 +1,7 @@
import json
import logging
from collections.abc import Generator
+from typing import cast
from tencentcloud.common import credential
from tencentcloud.common.exception import TencentCloudSDKException
@@ -11,9 +12,12 @@ from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
+ ImagePromptMessageContent,
PromptMessage,
+ PromptMessageContentType,
PromptMessageTool,
SystemPromptMessage,
+ TextPromptMessageContent,
ToolPromptMessage,
UserPromptMessage,
)
@@ -143,6 +147,25 @@ class HunyuanLargeLanguageModel(LargeLanguageModel):
tool_execute_result = {"result": message.content}
content = json.dumps(tool_execute_result, ensure_ascii=False)
dict_list.append({"Role": message.role.value, "Content": content, "ToolCallId": message.tool_call_id})
+ elif isinstance(message, UserPromptMessage):
+ message = cast(UserPromptMessage, message)
+ if isinstance(message.content, str):
+ dict_list.append({"Role": message.role.value, "Content": message.content})
+ else:
+ sub_messages = []
+ for message_content in message.content:
+ if message_content.type == PromptMessageContentType.TEXT:
+ message_content = cast(TextPromptMessageContent, message_content)
+ sub_message_dict = {"Type": "text", "Text": message_content.data}
+ sub_messages.append(sub_message_dict)
+ elif message_content.type == PromptMessageContentType.IMAGE:
+ message_content = cast(ImagePromptMessageContent, message_content)
+ sub_message_dict = {
+ "Type": "image_url",
+ "ImageUrl": {"Url": message_content.data},
+ }
+ sub_messages.append(sub_message_dict)
+ dict_list.append({"Role": message.role.value, "Contents": sub_messages})
else:
dict_list.append({"Role": message.role.value, "Content": message.content})
return dict_list
diff --git a/api/core/model_runtime/model_providers/jina/jina.yaml b/api/core/model_runtime/model_providers/jina/jina.yaml
index 23e18ad75f..2f2d6e6daa 100644
--- a/api/core/model_runtime/model_providers/jina/jina.yaml
+++ b/api/core/model_runtime/model_providers/jina/jina.yaml
@@ -1,6 +1,6 @@
-provider: jina
+provider: Jina AI
label:
- en_US: Jina
+ en_US: Jina AI
description:
en_US: Embedding and Rerank Model Supported
icon_small:
@@ -11,7 +11,7 @@ background: "#EFFDFD"
help:
title:
en_US: Get your API key from Jina AI
- zh_Hans: 从 Jina 获取 API Key
+ zh_Hans: 从 Jina AI 获取 API Key
url:
en_US: https://jina.ai/
supported_model_types:
@@ -67,3 +67,46 @@ model_credential_schema:
required: false
type: text-input
default: '8192'
+ - variable: task
+ label:
+ zh_Hans: 下游任务
+ en_US: Downstream task
+ placeholder:
+ zh_Hans: 选择将使用向量模型的下游任务。模型将返回针对该任务优化的向量。
+ en_US: Select the downstream task for which the embeddings will be used. The model will return the optimized embeddings for that task.
+ required: false
+ type: select
+ options:
+ - value: retrieval.query
+ label:
+ en_US: retrieval.query
+ - value: retrieval.passage
+ label:
+ en_US: retrieval.passage
+ - value: separation
+ label:
+ en_US: separation
+ - value: classification
+ label:
+ en_US: classification
+ - value: text-matching
+ label:
+ en_US: text-matching
+ - variable: dimensions
+ label:
+ zh_Hans: 输出维度
+ en_US: Output dimensions
+ placeholder:
+ zh_Hans: 输入您的输出维度
+ en_US: Enter output dimensions
+ required: false
+ type: text-input
+ - variable: late_chunking
+ label:
+ zh_Hans: 后期分块
+ en_US: Late chunking
+ placeholder:
+ zh_Hans: 应用后期分块技术来利用模型的长上下文功能来生成上下文块向量化。
+ en_US: Apply the late chunking technique to leverage the model's long-context capabilities for generating contextual chunk embeddings.
+ required: false
+ type: switch
diff --git a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
index ceb79567d5..6c96699ea2 100644
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@@ -27,6 +27,38 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.jina.ai/v1"
+ def _to_payload(self, model: str, texts: list[str], credentials: dict) -> dict:
+ """
+ Parse model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return: parsed credentials
+ """
+
+ def transform_jina_input_text(model, text):
+ if model == "jina-clip-v1":
+ return {"text": text}
+ return text
+
+ data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
+
+ task = credentials.get("task")
+ dimensions = credentials.get("dimensions")
+ late_chunking = credentials.get("late_chunking")
+
+ if task is not None:
+ data["task"] = task
+
+ if dimensions is not None:
+ data["dimensions"] = int(dimensions)
+
+ if late_chunking is not None:
+ data["late_chunking"] = late_chunking
+
+ return data
+
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
) -> TextEmbeddingResult:
@@ -49,15 +81,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
- def transform_jina_input_text(model, text):
- if model == "jina-clip-v1":
- return {"text": text}
- return text
-
- data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
-
- if model == "jina-embeddings-v3":
- data["task"] = "text-matching"
+ data = self._to_payload(model=model, texts=texts, credentials=credentials)
try:
response = post(url, headers=headers, data=dumps(data))
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml b/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
index 751003d71e..bdb06b7fff 100644
--- a/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
@@ -1,3 +1,8 @@
+- pixtral-12b-2409
+- codestral-latest
+- mistral-embed
+- open-mistral-nemo
+- open-codestral-mamba
- open-mistral-7b
- open-mixtral-8x7b
- open-mixtral-8x22b
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml b/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml
new file mode 100644
index 0000000000..5f1260233f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml
@@ -0,0 +1,51 @@
+model: codestral-latest
+label:
+ zh_Hans: codestral-latest
+ en_US: codestral-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4096
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml b/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml
new file mode 100644
index 0000000000..d759103d08
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml
@@ -0,0 +1,51 @@
+model: mistral-embed
+label:
+ zh_Hans: mistral-embed
+ en_US: mistral-embed
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml b/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml
new file mode 100644
index 0000000000..d7ffb9ea02
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml
@@ -0,0 +1,51 @@
+model: open-codestral-mamba
+label:
+ zh_Hans: open-codestral-mamba
+ en_US: open-codestral-mamba
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 256000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 16384
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml b/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml
new file mode 100644
index 0000000000..dcda4fbce7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml
@@ -0,0 +1,51 @@
+model: open-mistral-nemo
+label:
+ zh_Hans: open-mistral-nemo
+ en_US: open-mistral-nemo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8192
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml
new file mode 100644
index 0000000000..0b002b49ca
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml
@@ -0,0 +1,51 @@
+model: pixtral-12b-2409
+label:
+ zh_Hans: pixtral-12b-2409
+ en_US: pixtral-12b-2409
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8192
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/nomic/__init__.py b/api/core/model_runtime/model_providers/nomic/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg
new file mode 100644
index 0000000000..6c4a1058ab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg
@@ -0,0 +1,13 @@
+
diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png b/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png
new file mode 100644
index 0000000000..3eba3b82bc
Binary files /dev/null and b/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png differ
diff --git a/api/core/model_runtime/model_providers/nomic/_common.py b/api/core/model_runtime/model_providers/nomic/_common.py
new file mode 100644
index 0000000000..406577dcd7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/_common.py
@@ -0,0 +1,28 @@
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+
+
+class _CommonNomic:
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ The key is the error type thrown to the caller
+ The value is the error type thrown by the model,
+ which needs to be converted into a unified error type for the caller.
+
+ :return: Invoke error mapping
+ """
+ return {
+ InvokeConnectionError: [InvokeConnectionError],
+ InvokeServerUnavailableError: [InvokeServerUnavailableError],
+ InvokeRateLimitError: [InvokeRateLimitError],
+ InvokeAuthorizationError: [InvokeAuthorizationError],
+ InvokeBadRequestError: [KeyError, InvokeBadRequestError],
+ }
diff --git a/api/core/model_runtime/model_providers/nomic/nomic.py b/api/core/model_runtime/model_providers/nomic/nomic.py
new file mode 100644
index 0000000000..d4e5da2e98
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/nomic.py
@@ -0,0 +1,26 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class NomicAtlasProvider(ModelProvider):
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
+ model_instance.validate_credentials(model="nomic-embed-text-v1.5", credentials=credentials)
+ except CredentialsValidateFailedError as ex:
+ raise ex
+ except Exception as ex:
+ logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+ raise ex
diff --git a/api/core/model_runtime/model_providers/nomic/nomic.yaml b/api/core/model_runtime/model_providers/nomic/nomic.yaml
new file mode 100644
index 0000000000..60dcf1facb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/nomic.yaml
@@ -0,0 +1,29 @@
+provider: nomic
+label:
+ zh_Hans: Nomic Atlas
+ en_US: Nomic Atlas
+icon_small:
+ en_US: icon_s_en.png
+icon_large:
+ en_US: icon_l_en.svg
+background: "#EFF1FE"
+help:
+ title:
+ en_US: Get your API key from Nomic Atlas
+ zh_Hans: 从Nomic Atlas获取 API Key
+ url:
+ en_US: https://atlas.nomic.ai/data
+supported_model_types:
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: nomic_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml
new file mode 100644
index 0000000000..111452df57
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml
@@ -0,0 +1,8 @@
+model: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+pricing:
+ input: "0.1"
+ unit: "0.000001"
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml
new file mode 100644
index 0000000000..ac59f106ed
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml
@@ -0,0 +1,8 @@
+model: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+pricing:
+ input: "0.1"
+ unit: "0.000001"
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..6cccff6d46
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
@@ -0,0 +1,170 @@
+import time
+from functools import wraps
+from typing import Optional
+
+from nomic import embed
+from nomic import login as nomic_login
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import (
+ EmbeddingUsage,
+ TextEmbeddingResult,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import (
+ TextEmbeddingModel,
+)
+from core.model_runtime.model_providers.nomic._common import _CommonNomic
+
+
+def nomic_login_required(func):
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ try:
+ if not kwargs.get("credentials"):
+ raise ValueError("missing credentials parameters")
+ credentials = kwargs.get("credentials")
+ if "nomic_api_key" not in credentials:
+ raise ValueError("missing nomic_api_key in credentials parameters")
+ # nomic login
+ nomic_login(credentials["nomic_api_key"])
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+ return func(*args, **kwargs)
+
+ return wrapper
+
+
+class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
+ """
+ Model class for nomic text embedding model.
+ """
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ ) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :return: embeddings result
+ """
+ embeddings, prompt_tokens, total_tokens = self.embed_text(
+ model=model,
+ credentials=credentials,
+ texts=texts,
+ )
+
+ # calc usage
+ usage = self._calc_response_usage(
+ model=model, credentials=credentials, tokens=prompt_tokens, total_tokens=total_tokens
+ )
+ return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ if len(texts) == 0:
+ return 0
+
+ _, prompt_tokens, _ = self.embed_text(
+ model=model,
+ credentials=credentials,
+ texts=texts,
+ )
+ return prompt_tokens
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ # call embedding model
+ self.embed_text(model=model, credentials=credentials, texts=["ping"])
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ @nomic_login_required
+ def embed_text(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int, int]:
+ """Call out to Nomic's embedding endpoint.
+
+ Args:
+ model: The model to use for embedding.
+ texts: The list of texts to embed.
+
+ Returns:
+ List of embeddings, one for each text, and tokens usage.
+ """
+ embeddings: list[list[float]] = []
+ prompt_tokens = 0
+ total_tokens = 0
+
+ response = embed.text(
+ model=model,
+ texts=texts,
+ )
+
+ if not (response and "embeddings" in response):
+ raise ValueError("Embedding data is missing in the response.")
+
+ if not (response and "usage" in response):
+ raise ValueError("Response usage is missing.")
+
+ if "prompt_tokens" not in response["usage"]:
+ raise ValueError("Response usage does not contain prompt tokens.")
+
+ if "total_tokens" not in response["usage"]:
+ raise ValueError("Response usage does not contain total tokens.")
+
+ embeddings = [list(map(float, e)) for e in response["embeddings"]]
+ total_tokens = response["usage"]["total_tokens"]
+ prompt_tokens = response["usage"]["prompt_tokens"]
+ return embeddings, prompt_tokens, total_tokens
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int, total_tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: prompt tokens
+ :param total_tokens: total tokens
+ :return: usage
+ """
+ # get input price info
+ input_price_info = self.get_price(
+ model=model,
+ credentials=credentials,
+ price_type=PriceType.INPUT,
+ tokens=tokens,
+ )
+
+ # transform usage
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=total_tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at,
+ )
+
+ return usage
diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index 1ed77a2ee8..ff732e6925 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -472,12 +472,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
ParameterRule(
name=DefaultParameterName.TEMPERATURE.value,
use_template=DefaultParameterName.TEMPERATURE.value,
- label=I18nObject(en_US="Temperature"),
+ label=I18nObject(en_US="Temperature", zh_Hans="温度"),
type=ParameterType.FLOAT,
help=I18nObject(
en_US="The temperature of the model. "
"Increasing the temperature will make the model answer "
- "more creatively. (Default: 0.8)"
+ "more creatively. (Default: 0.8)",
+ zh_Hans="模型的温度。增加温度将使模型的回答更具创造性。(默认值:0.8)",
),
default=0.1,
min=0,
@@ -486,12 +487,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
ParameterRule(
name=DefaultParameterName.TOP_P.value,
use_template=DefaultParameterName.TOP_P.value,
- label=I18nObject(en_US="Top P"),
+ label=I18nObject(en_US="Top P", zh_Hans="Top P"),
type=ParameterType.FLOAT,
help=I18nObject(
en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to "
"more diverse text, while a lower value (e.g., 0.5) will generate more "
- "focused and conservative text. (Default: 0.9)"
+ "focused and conservative text. (Default: 0.9)",
+ zh_Hans="与top-k一起工作。较高的值(例如,0.95)会导致生成更多样化的文本,而较低的值(例如,0.5)会生成更专注和保守的文本。(默认值:0.9)",
),
default=0.9,
min=0,
@@ -499,12 +501,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
),
ParameterRule(
name="top_k",
- label=I18nObject(en_US="Top K"),
+ label=I18nObject(en_US="Top K", zh_Hans="Top K"),
type=ParameterType.INT,
help=I18nObject(
en_US="Reduces the probability of generating nonsense. "
"A higher value (e.g. 100) will give more diverse answers, "
- "while a lower value (e.g. 10) will be more conservative. (Default: 40)"
+ "while a lower value (e.g. 10) will be more conservative. (Default: 40)",
+ zh_Hans="减少生成无意义内容的可能性。较高的值(例如100)将提供更多样化的答案,而较低的值(例如10)将更为保守。(默认值:40)",
),
min=1,
max=100,
@@ -516,7 +519,8 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
help=I18nObject(
en_US="Sets how strongly to penalize repetitions. "
"A higher value (e.g., 1.5) will penalize repetitions more strongly, "
- "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)"
+ "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
+ zh_Hans="设置对重复内容的惩罚强度。一个较高的值(例如,1.5)会更强地惩罚重复内容,而一个较低的值(例如,0.9)则会相对宽容。(默认值:1.1)",
),
min=-2,
max=2,
@@ -524,11 +528,12 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
ParameterRule(
name="num_predict",
use_template="max_tokens",
- label=I18nObject(en_US="Num Predict"),
+ label=I18nObject(en_US="Num Predict", zh_Hans="最大令牌数预测"),
type=ParameterType.INT,
help=I18nObject(
en_US="Maximum number of tokens to predict when generating text. "
- "(Default: 128, -1 = infinite generation, -2 = fill context)"
+ "(Default: 128, -1 = infinite generation, -2 = fill context)",
+ zh_Hans="生成文本时预测的最大令牌数。(默认值:128,-1 = 无限生成,-2 = 填充上下文)",
),
default=(512 if int(credentials.get("max_tokens", 4096)) >= 768 else 128),
min=-2,
@@ -536,121 +541,137 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
),
ParameterRule(
name="mirostat",
- label=I18nObject(en_US="Mirostat sampling"),
+ label=I18nObject(en_US="Mirostat sampling", zh_Hans="Mirostat 采样"),
type=ParameterType.INT,
help=I18nObject(
en_US="Enable Mirostat sampling for controlling perplexity. "
- "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"
+ "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)",
+ zh_Hans="启用 Mirostat 采样以控制困惑度。"
+ "(默认值:0,0 = 禁用,1 = Mirostat,2 = Mirostat 2.0)",
),
min=0,
max=2,
),
ParameterRule(
name="mirostat_eta",
- label=I18nObject(en_US="Mirostat Eta"),
+ label=I18nObject(en_US="Mirostat Eta", zh_Hans="学习率"),
type=ParameterType.FLOAT,
help=I18nObject(
en_US="Influences how quickly the algorithm responds to feedback from "
"the generated text. A lower learning rate will result in slower adjustments, "
"while a higher learning rate will make the algorithm more responsive. "
- "(Default: 0.1)"
+ "(Default: 0.1)",
+ zh_Hans="影响算法对生成文本反馈响应的速度。较低的学习率会导致调整速度变慢,而较高的学习率会使得算法更加灵敏。(默认值:0.1)",
),
precision=1,
),
ParameterRule(
name="mirostat_tau",
- label=I18nObject(en_US="Mirostat Tau"),
+ label=I18nObject(en_US="Mirostat Tau", zh_Hans="文本连贯度"),
type=ParameterType.FLOAT,
help=I18nObject(
en_US="Controls the balance between coherence and diversity of the output. "
- "A lower value will result in more focused and coherent text. (Default: 5.0)"
+ "A lower value will result in more focused and coherent text. (Default: 5.0)",
+ zh_Hans="控制输出的连贯性和多样性之间的平衡。较低的值会导致更专注和连贯的文本。(默认值:5.0)",
),
precision=1,
),
ParameterRule(
name="num_ctx",
- label=I18nObject(en_US="Size of context window"),
+ label=I18nObject(en_US="Size of context window", zh_Hans="上下文窗口大小"),
type=ParameterType.INT,
help=I18nObject(
- en_US="Sets the size of the context window used to generate the next token. (Default: 2048)"
+ en_US="Sets the size of the context window used to generate the next token. (Default: 2048)",
+ zh_Hans="设置用于生成下一个标记的上下文窗口大小。(默认值:2048)",
),
default=2048,
min=1,
),
ParameterRule(
name="num_gpu",
- label=I18nObject(en_US="GPU Layers"),
+ label=I18nObject(en_US="GPU Layers", zh_Hans="GPU 层数"),
type=ParameterType.INT,
help=I18nObject(
en_US="The number of layers to offload to the GPU(s). "
"On macOS it defaults to 1 to enable metal support, 0 to disable."
"As long as a model fits into one gpu it stays in one. "
- "It does not set the number of GPU(s). "
+ "It does not set the number of GPU(s). ",
+ zh_Hans="加载到 GPU 的层数。在 macOS 上,默认为 1 以启用 Metal 支持,设置为 0 则禁用。"
+ "只要模型适合一个 GPU,它就保留在其中。它不设置 GPU 的数量。",
),
min=-1,
default=1,
),
ParameterRule(
name="num_thread",
- label=I18nObject(en_US="Num Thread"),
+ label=I18nObject(en_US="Num Thread", zh_Hans="线程数"),
type=ParameterType.INT,
help=I18nObject(
en_US="Sets the number of threads to use during computation. "
"By default, Ollama will detect this for optimal performance. "
"It is recommended to set this value to the number of physical CPU cores "
- "your system has (as opposed to the logical number of cores)."
+ "your system has (as opposed to the logical number of cores).",
+ zh_Hans="设置计算过程中使用的线程数。默认情况下,Ollama会检测以获得最佳性能。建议将此值设置为系统拥有的物理CPU核心数(而不是逻辑核心数)。",
),
min=1,
),
ParameterRule(
name="repeat_last_n",
- label=I18nObject(en_US="Repeat last N"),
+ label=I18nObject(en_US="Repeat last N", zh_Hans="回溯内容"),
type=ParameterType.INT,
help=I18nObject(
en_US="Sets how far back for the model to look back to prevent repetition. "
- "(Default: 64, 0 = disabled, -1 = num_ctx)"
+ "(Default: 64, 0 = disabled, -1 = num_ctx)",
+ zh_Hans="设置模型回溯多远的内容以防止重复。(默认值:64,0 = 禁用,-1 = num_ctx)",
),
min=-1,
),
ParameterRule(
name="tfs_z",
- label=I18nObject(en_US="TFS Z"),
+ label=I18nObject(en_US="TFS Z", zh_Hans="减少标记影响"),
type=ParameterType.FLOAT,
help=I18nObject(
en_US="Tail free sampling is used to reduce the impact of less probable tokens "
"from the output. A higher value (e.g., 2.0) will reduce the impact more, "
- "while a value of 1.0 disables this setting. (default: 1)"
+ "while a value of 1.0 disables this setting. (default: 1)",
+ zh_Hans="用于减少输出中不太可能的标记的影响。较高的值(例如,2.0)会更多地减少这种影响,而1.0的值则会禁用此设置。(默认值:1)",
),
precision=1,
),
ParameterRule(
name="seed",
- label=I18nObject(en_US="Seed"),
+ label=I18nObject(en_US="Seed", zh_Hans="随机数种子"),
type=ParameterType.INT,
help=I18nObject(
en_US="Sets the random number seed to use for generation. Setting this to "
"a specific number will make the model generate the same text for "
- "the same prompt. (Default: 0)"
+ "the same prompt. (Default: 0)",
+ zh_Hans="设置用于生成的随机数种子。将此设置为特定数字将使模型对相同的提示生成相同的文本。(默认值:0)",
),
),
ParameterRule(
name="keep_alive",
- label=I18nObject(en_US="Keep Alive"),
+ label=I18nObject(en_US="Keep Alive", zh_Hans="模型存活时间"),
type=ParameterType.STRING,
help=I18nObject(
en_US="Sets how long the model is kept in memory after generating a response. "
"This must be a duration string with a unit (e.g., '10m' for 10 minutes or '24h' for 24 hours)."
" A negative number keeps the model loaded indefinitely, and '0' unloads the model"
" immediately after generating a response."
- " Valid time units are 's','m','h'. (Default: 5m)"
+ " Valid time units are 's','m','h'. (Default: 5m)",
+ zh_Hans="设置模型在生成响应后在内存中保留的时间。"
+ "这必须是一个带有单位的持续时间字符串(例如,'10m' 表示10分钟,'24h' 表示24小时)。"
+ "负数表示无限期地保留模型,'0'表示在生成响应后立即卸载模型。"
+ "有效的时间单位有 's'(秒)、'm'(分钟)、'h'(小时)。(默认值:5m)",
),
),
ParameterRule(
name="format",
- label=I18nObject(en_US="Format"),
+ label=I18nObject(en_US="Format", zh_Hans="返回格式"),
type=ParameterType.STRING,
help=I18nObject(
- en_US="the format to return a response in. Currently the only accepted value is json."
+ en_US="the format to return a response in. Currently the only accepted value is json.",
+ zh_Hans="返回响应的格式。目前唯一接受的值是json。",
),
options=["json"],
),
diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
index 5a8a754f72..c2ffe653c8 100644
--- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
@@ -205,7 +205,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
parameter_rules=[
ParameterRule(
name=DefaultParameterName.TEMPERATURE.value,
- label=I18nObject(en_US="Temperature"),
+ label=I18nObject(en_US="Temperature", zh_Hans="温度"),
+ help=I18nObject(
+ en_US="Kernel sampling threshold. Used to determine the randomness of the results."
+ "The higher the value, the stronger the randomness."
+ "The higher the possibility of getting different answers to the same question.",
+ zh_Hans="核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。",
+ ),
type=ParameterType.FLOAT,
default=float(credentials.get("temperature", 0.7)),
min=0,
@@ -214,7 +220,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
),
ParameterRule(
name=DefaultParameterName.TOP_P.value,
- label=I18nObject(en_US="Top P"),
+ label=I18nObject(en_US="Top P", zh_Hans="Top P"),
+ help=I18nObject(
+ en_US="The probability threshold of the nucleus sampling method during the generation process."
+ "The larger the value is, the higher the randomness of generation will be."
+ "The smaller the value is, the higher the certainty of generation will be.",
+ zh_Hans="生成过程中核采样方法概率阈值。取值越大,生成的随机性越高;取值越小,生成的确定性越高。",
+ ),
type=ParameterType.FLOAT,
default=float(credentials.get("top_p", 1)),
min=0,
@@ -223,7 +235,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
),
ParameterRule(
name=DefaultParameterName.FREQUENCY_PENALTY.value,
- label=I18nObject(en_US="Frequency Penalty"),
+ label=I18nObject(en_US="Frequency Penalty", zh_Hans="频率惩罚"),
+ help=I18nObject(
+ en_US="For controlling the repetition rate of words used by the model."
+ "Increasing this can reduce the repetition of the same words in the model's output.",
+ zh_Hans="用于控制模型已使用字词的重复率。 提高此项可以降低模型在输出中重复相同字词的重复度。",
+ ),
type=ParameterType.FLOAT,
default=float(credentials.get("frequency_penalty", 0)),
min=-2,
@@ -231,7 +248,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
),
ParameterRule(
name=DefaultParameterName.PRESENCE_PENALTY.value,
- label=I18nObject(en_US="Presence Penalty"),
+ label=I18nObject(en_US="Presence Penalty", zh_Hans="存在惩罚"),
+ help=I18nObject(
+ en_US="Used to control the repetition rate when generating models."
+ "Increasing this can reduce the repetition rate of model generation.",
+ zh_Hans="用于控制模型生成时的重复度。提高此项可以降低模型生成的重复度。",
+ ),
type=ParameterType.FLOAT,
default=float(credentials.get("presence_penalty", 0)),
min=-2,
@@ -239,7 +261,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
),
ParameterRule(
name=DefaultParameterName.MAX_TOKENS.value,
- label=I18nObject(en_US="Max Tokens"),
+ label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+ help=I18nObject(
+ en_US="Maximum length of tokens for the model response.", zh_Hans="模型回答的tokens的最大长度。"
+ ),
type=ParameterType.INT,
default=512,
min=1,
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml b/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
index 7e00dd3f4b..d9497b76b8 100644
--- a/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
@@ -1,3 +1,5 @@
+- openai/o1-preview
+- openai/o1-mini
- openai/gpt-4o
- openai/gpt-4o-mini
- openai/gpt-4
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/llm.py b/api/core/model_runtime/model_providers/openrouter/llm/llm.py
index b6bb249a04..736ab8e7a8 100644
--- a/api/core/model_runtime/model_providers/openrouter/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llm.py
@@ -1,7 +1,7 @@
from collections.abc import Generator
from typing import Optional, Union
-from core.model_runtime.entities.llm_entities import LLMResult
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
from core.model_runtime.entities.model_entities import AIModelEntity
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
@@ -26,7 +26,7 @@ class OpenRouterLargeLanguageModel(OAIAPICompatLargeLanguageModel):
) -> Union[LLMResult, Generator]:
self._update_credential(model, credentials)
- return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+ return self._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
def validate_credentials(self, model: str, credentials: dict) -> None:
self._update_credential(model, credentials)
@@ -46,7 +46,48 @@ class OpenRouterLargeLanguageModel(OAIAPICompatLargeLanguageModel):
) -> Union[LLMResult, Generator]:
self._update_credential(model, credentials)
- return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+ block_as_stream = False
+ if model.startswith("openai/o1"):
+ block_as_stream = True
+ stop = None
+
+ # invoke block as stream
+ if stream and block_as_stream:
+ return self._generate_block_as_stream(
+ model, credentials, prompt_messages, model_parameters, tools, stop, user
+ )
+ else:
+ return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+
+ def _generate_block_as_stream(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None,
+ stop: Optional[list[str]] = None,
+ user: Optional[str] = None,
+ ) -> Generator:
+ resp: LLMResult = super()._generate(
+ model, credentials, prompt_messages, model_parameters, tools, stop, False, user
+ )
+
+ yield LLMResultChunk(
+ model=model,
+ prompt_messages=prompt_messages,
+ delta=LLMResultChunkDelta(
+ index=0,
+ message=resp.message,
+ usage=self._calc_response_usage(
+ model=model,
+ credentials=credentials,
+ prompt_tokens=resp.usage.prompt_tokens,
+ completion_tokens=resp.usage.completion_tokens,
+ ),
+ finish_reason="stop",
+ ),
+ )
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
self._update_credential(model, credentials)
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml b/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml
new file mode 100644
index 0000000000..85a918ff5e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml
@@ -0,0 +1,40 @@
+model: openai/o1-mini
+label:
+ en_US: o1-mini
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 65536
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: response_format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "3.00"
+ output: "12.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml b/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml
new file mode 100644
index 0000000000..74b0a511be
--- /dev/null
+++ b/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml
@@ -0,0 +1,40 @@
+model: openai/o1-preview
+label:
+ en_US: o1-preview
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 32768
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: response_format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "15.00"
+ output: "60.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
index 87712874b9..bf91468fcf 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
index f16f3de60b..781b837e8e 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
index 21267c240b..67210e9020 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
index 80c7ec40f2..482632ff06 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
index 841dd97f35..ddb6fd977c 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
index 33d5d12b22..024c79dbcf 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
new file mode 100644
index 0000000000..94f661f40d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
@@ -0,0 +1,61 @@
+model: Qwen2-72B-Instruct-AWQ-int4
+label:
+ en_US: Qwen2-72B-Instruct-AWQ-int4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
index 62255cc7d2..a06f8d5ab1 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
@@ -61,3 +61,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
new file mode 100644
index 0000000000..4369411399
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
@@ -0,0 +1,63 @@
+model: Qwen2-7B-Instruct
+label:
+ en_US: Qwen2-7B-Instruct
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: completion
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
index 2f3f1f0225..d549ecd227 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
@@ -61,3 +61,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
new file mode 100644
index 0000000000..15cbf01f1f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-72B-Instruct
+label:
+ en_US: Qwen2.5-72B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 30720
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
new file mode 100644
index 0000000000..dadc8f8f32
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-7B-Instruct
+label:
+ en_US: Qwen2.5-7B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
new file mode 100644
index 0000000000..649be20b48
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
@@ -0,0 +1,61 @@
+model: Reflection-Llama-3.1-70B
+label:
+ en_US: Reflection-Llama-3.1-70B
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 10240
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
new file mode 100644
index 0000000000..92eae6804f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
@@ -0,0 +1,61 @@
+model: Yi-1_5-9B-Chat-16K
+label:
+ en_US: Yi-1_5-9B-Chat-16K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
new file mode 100644
index 0000000000..0e21ce148c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-1.5B-Chat
+label:
+ en_US: Yi-Coder-1.5B-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 20480
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
new file mode 100644
index 0000000000..23b0841ce4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-9B-Chat
+label:
+ en_US: Yi-Coder-9B-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 20480
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
index 2c9eac0e49..37bf400f1e 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
@@ -1,15 +1,24 @@
-- Meta-Llama-3.1-405B-Instruct-AWQ-INT4
-- Meta-Llama-3.1-8B-Instruct
-- Meta-Llama-3-70B-Instruct-GPTQ-Int4
-- Meta-Llama-3-8B-Instruct
-- Qwen2-72B-Instruct-GPTQ-Int4
+- Qwen2.5-72B-Instruct
+- Qwen2.5-7B-Instruct
+- Yi-Coder-1.5B-Chat
+- Yi-Coder-9B-Chat
+- Qwen2-72B-Instruct-AWQ-int4
+- Yi-1_5-9B-Chat-16K
+- Qwen2-7B-Instruct
+- Reflection-Llama-3.1-70B
- Qwen2-72B-Instruct
+- Meta-Llama-3.1-8B-Instruct
+
+- Meta-Llama-3.1-405B-Instruct-AWQ-INT4
+- Meta-Llama-3-70B-Instruct-GPTQ-Int4
+- chatglm3-6b
+- Meta-Llama-3-8B-Instruct
+- Llama3-Chinese_v2
+- deepseek-v2-lite-chat
+- Qwen2-72B-Instruct-GPTQ-Int4
- Qwen2-7B
- Qwen-14B-Chat-Int4
- Qwen1.5-72B-Chat-GPTQ-Int4
- Qwen1.5-7B
- Qwen1.5-110B-Chat-GPTQ-Int4
- deepseek-v2-chat
-- deepseek-v2-lite-chat
-- Llama3-Chinese_v2
-- chatglm3-6b
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
index f9c26b7f90..75d80f784a 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
index 078922ef95..fa9a7b7175 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
index 4ff3af7b51..75a26d2505 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
@@ -59,3 +59,4 @@ pricing:
output: "0.000"
unit: "0.000"
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
index 450d22fb75..9a4ead031d 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
@@ -1,7 +1,5 @@
import logging
-from core.model_runtime.entities.model_entities import ModelType
-from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
@@ -9,20 +7,4 @@ logger = logging.getLogger(__name__)
class PerfXCloudProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
- """
- Validate provider credentials
- if validate failed, raise exception
-
- :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
- """
- try:
- model_instance = self.get_model_instance(ModelType.LLM)
-
- # Use `Qwen2_72B_Chat_GPTQ_Int4` model for validate,
- # no matter what model you pass in, text completion model or chat model
- model_instance.validate_credentials(model="Qwen2-72B-Instruct-GPTQ-Int4", credentials=credentials)
- except CredentialsValidateFailedError as ex:
- raise ex
- except Exception as ex:
- logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
- raise ex
+ pass
diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml
new file mode 100644
index 0000000000..03db0d8bce
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml
@@ -0,0 +1,4 @@
+model: gte-Qwen2-7B-instruct
+model_type: text-embedding
+model_properties:
+ context_size: 2048
diff --git a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
index 2edd13d56d..04789197ee 100644
--- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
+++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
@@ -85,7 +85,6 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
"""
sagemaker_client: Any = None
- sagemaker_sess: Any = None
predictor: Any = None
def _handle_chat_generate_response(
@@ -213,23 +212,22 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
:return: full response or stream response chunk generator result
"""
if not self.sagemaker_client:
- access_key = credentials.get("access_key")
- secret_key = credentials.get("secret_key")
+ access_key = credentials.get("aws_access_key_id")
+ secret_key = credentials.get("aws_secret_access_key")
aws_region = credentials.get("aws_region")
+ boto_session = None
if aws_region:
if access_key and secret_key:
- self.sagemaker_client = boto3.client(
- "sagemaker-runtime",
- aws_access_key_id=access_key,
- aws_secret_access_key=secret_key,
- region_name=aws_region,
+ boto_session = boto3.Session(
+ aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=aws_region
)
else:
- self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region)
+ boto_session = boto3.Session(region_name=aws_region)
else:
- self.sagemaker_client = boto3.client("sagemaker-runtime")
+ boto_session = boto3.Session()
- sagemaker_session = Session(sagemaker_runtime_client=self.sagemaker_client)
+ self.sagemaker_client = boto_session.client("sagemaker")
+ sagemaker_session = Session(boto_session=boto_session, sagemaker_client=self.sagemaker_client)
self.predictor = Predictor(
endpoint_name=credentials.get("sagemaker_endpoint"),
sagemaker_session=sagemaker_session,
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index c2f0eb0536..43db4aed11 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,3 +1,7 @@
+- Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-14B-Instruct
+- Qwen/Qwen2.5-32B-Instruct
+- Qwen/Qwen2.5-72B-Instruct
- Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct
@@ -6,6 +10,7 @@
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
- THUDM/glm-4-9b-chat
+- deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct
- internlm/internlm2_5-7b-chat
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
new file mode 100644
index 0000000000..1c8e15ae52
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
@@ -0,0 +1,30 @@
+model: deepseek-ai/DeepSeek-V2.5
+label:
+ en_US: deepseek-ai/DeepSeek-V2.5
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.33'
+ output: '1.33'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
new file mode 100644
index 0000000000..02a401464b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-14B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-14B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0.7'
+ output: '0.7'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
new file mode 100644
index 0000000000..d084617e7d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-32B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-32B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.26'
+ output: '1.26'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
new file mode 100644
index 0000000000..dfbad2494c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-72B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-72B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '4.13'
+ output: '4.13'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
new file mode 100644
index 0000000000..cdc8ffc4d2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-7B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-7B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/spark/llm/_client.py b/api/core/model_runtime/model_providers/spark/llm/_client.py
index b99a657e71..48911f657a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_client.py
+++ b/api/core/model_runtime/model_providers/spark/llm/_client.py
@@ -25,6 +25,7 @@ class SparkLLMClient:
"spark-pro": {"version": "v3.1", "chat_domain": "generalv3"},
"spark-pro-128k": {"version": "pro-128k", "chat_domain": "pro-128k"},
"spark-max": {"version": "v3.5", "chat_domain": "generalv3.5"},
+ "spark-max-32k": {"version": "max-32k", "chat_domain": "max-32k"},
"spark-4.0-ultra": {"version": "v4.0", "chat_domain": "4.0Ultra"},
}
@@ -32,7 +33,7 @@ class SparkLLMClient:
self.chat_domain = model_api_configs[model]["chat_domain"]
- if model == "spark-pro-128k":
+ if model in ["spark-pro-128k", "spark-max-32k"]:
self.api_base = f"wss://{domain}/{endpoint}/{api_version}"
else:
self.api_base = f"wss://{domain}/{api_version}/{endpoint}"
diff --git a/api/core/model_runtime/model_providers/spark/llm/_position.yaml b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
index 458397f2aa..73f39cb119 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
@@ -1,3 +1,4 @@
+- spark-max-32k
- spark-4.0-ultra
- spark-max
- spark-pro-128k
diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml
new file mode 100644
index 0000000000..1a1ab6844c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml
@@ -0,0 +1,33 @@
+model: spark-max-32k
+label:
+ en_US: Spark Max-32K
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
new file mode 100644
index 0000000000..8ce336d60c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@@ -0,0 +1,51 @@
+- qwen-vl-max-0809
+- qwen-vl-max-0201
+- qwen-vl-max
+- qwen-max-latest
+- qwen-max-1201
+- qwen-max-0919
+- qwen-max-0428
+- qwen-max-0403
+- qwen-max-0107
+- qwen-max
+- qwen-max-longcontext
+- qwen-plus-latest
+- qwen-plus-0919
+- qwen-plus-0806
+- qwen-plus-0723
+- qwen-plus-0624
+- qwen-plus-0206
+- qwen-plus-chat
+- qwen-plus
+- qwen-vl-plus-0809
+- qwen-vl-plus-0201
+- qwen-vl-plus
+- qwen-turbo-latest
+- qwen-turbo-0919
+- qwen-turbo-0624
+- qwen-turbo-0206
+- qwen-turbo-chat
+- qwen-turbo
+- qwen2.5-72b-instruct
+- qwen2.5-32b-instruct
+- qwen2.5-14b-instruct
+- qwen2.5-7b-instruct
+- qwen2.5-3b-instruct
+- qwen2.5-1.5b-instruct
+- qwen2.5-0.5b-instruct
+- qwen2.5-coder-7b-instruct
+- qwen2-math-72b-instruct
+- qwen2-math-7b-instruct
+- qwen2-math-1.5b-instruct
+- qwen-long
+- qwen-math-plus-latest
+- qwen-math-plus-0919
+- qwen-math-plus-0816
+- qwen-math-plus
+- qwen-math-turbo-latest
+- qwen-math-turbo-0919
+- qwen-math-turbo
+- qwen-coder-turbo-latest
+- qwen-coder-turbo-0919
+- qwen-coder-turbo
+- farui-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
index aad07f5673..e5de586c1c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +70,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index 1d4eba6668..f90c7f075f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -30,7 +30,15 @@ from core.model_runtime.entities.message_entities import (
ToolPromptMessage,
UserPromptMessage,
)
-from core.model_runtime.entities.model_entities import ModelFeature
+from core.model_runtime.entities.model_entities import (
+ AIModelEntity,
+ FetchFrom,
+ I18nObject,
+ ModelFeature,
+ ModelType,
+ ParameterRule,
+ ParameterType,
+)
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
@@ -520,3 +528,64 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
UnsupportedHTTPMethod,
],
}
+
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
+ """
+ Architecture for defining customizable models
+
+ :param model: model name
+ :param credentials: model credentials
+ :return: AIModelEntity or None
+ """
+ rules = [
+ ParameterRule(
+ name="temperature",
+ type=ParameterType.FLOAT,
+ use_template="temperature",
+ label=I18nObject(zh_Hans="温度", en_US="Temperature"),
+ ),
+ ParameterRule(
+ name="top_p",
+ type=ParameterType.FLOAT,
+ use_template="top_p",
+ label=I18nObject(zh_Hans="Top P", en_US="Top P"),
+ ),
+ ParameterRule(
+ name="top_k",
+ type=ParameterType.INT,
+ min=0,
+ max=99,
+ label=I18nObject(zh_Hans="top_k", en_US="top_k"),
+ ),
+ ParameterRule(
+ name="max_tokens",
+ type=ParameterType.INT,
+ min=1,
+ max=128000,
+ default=1024,
+ label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
+ ),
+ ParameterRule(
+ name="seed",
+ type=ParameterType.INT,
+ default=1234,
+ label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
+ ),
+ ParameterRule(
+ name="repetition_penalty",
+ type=ParameterType.FLOAT,
+ default=1.1,
+ label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
+ ),
+ ]
+
+ entity = AIModelEntity(
+ model=model,
+ label=I18nObject(en_US=model),
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_type=ModelType.LLM,
+ model_properties={},
+ parameter_rules=rules,
+ )
+
+ return entity
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
new file mode 100644
index 0000000000..6ab39cde2d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@@ -0,0 +1,83 @@
+model: qwen-coder-turbo-0919
+label:
+ en_US: qwen-coder-turbo-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
new file mode 100644
index 0000000000..be6d9a0e07
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@@ -0,0 +1,83 @@
+model: qwen-coder-turbo-latest
+label:
+ en_US: qwen-coder-turbo-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
new file mode 100644
index 0000000000..d2aca4f514
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@@ -0,0 +1,83 @@
+model: qwen-coder-turbo
+label:
+ en_US: qwen-coder-turbo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
index 33b3435eb6..a59a3350f6 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@@ -1,3 +1,4 @@
+# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
model: qwen-long
label:
en_US: qwen-long
@@ -62,6 +63,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +71,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
new file mode 100644
index 0000000000..cab7233c98
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@@ -0,0 +1,83 @@
+model: qwen-math-plus-0816
+label:
+ en_US: qwen-math-plus-0816
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
new file mode 100644
index 0000000000..f82fba0c01
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@@ -0,0 +1,83 @@
+model: qwen-math-plus-0919
+label:
+ en_US: qwen-math-plus-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
new file mode 100644
index 0000000000..e2fb6e0e55
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@@ -0,0 +1,83 @@
+model: qwen-math-plus-latest
+label:
+ en_US: qwen-math-plus-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
new file mode 100644
index 0000000000..8803e747e5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@@ -0,0 +1,83 @@
+model: qwen-math-plus
+label:
+ en_US: qwen-math-plus
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
new file mode 100644
index 0000000000..0dc5a066f0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@@ -0,0 +1,83 @@
+model: qwen-math-turbo-0919
+label:
+ en_US: qwen-math-turbo-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
new file mode 100644
index 0000000000..2ac0e4692a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@@ -0,0 +1,83 @@
+model: qwen-math-turbo-latest
+label:
+ en_US: qwen-math-turbo-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
new file mode 100644
index 0000000000..9a7f1312e9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@@ -0,0 +1,83 @@
+model: qwen-math-turbo
+label:
+ en_US: qwen-math-turbo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
index 7c90afecf5..c0eef37557 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@@ -8,7 +8,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
- context_size: 8192
+ context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +70,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
index 935a16ebcb..c12444bd7b 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@@ -8,7 +8,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
- context_size: 8192
+ context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +70,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
index c39799a71f..173c55b6b9 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@@ -8,7 +8,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
- context_size: 8192
+ context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +70,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
new file mode 100644
index 0000000000..692a38140d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@@ -0,0 +1,85 @@
+model: qwen-max-0919
+label:
+ en_US: qwen-max-0919
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.06'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
new file mode 100644
index 0000000000..afd7fb4b77
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@@ -0,0 +1,85 @@
+model: qwen-max-latest
+label:
+ en_US: qwen-max-latest
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.06'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
index 1c705670ca..d02ba7af18 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -8,7 +8,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
- context_size: 32768
+ context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
@@ -22,9 +22,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 2000
+ default: 8000
min: 1
- max: 2000
+ max: 8000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +70,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
index 64094effbb..c6a64dc507 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@@ -8,7 +8,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
- context_size: 8192
+ context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,13 +70,16 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
- input: '0.04'
- output: '0.12'
+ input: '0.02'
+ output: '0.06'
unit: '0.001'
currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
index 7940be9e8b..1111298c37 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@@ -6,7 +6,7 @@ features:
- agent-thought
model_properties:
mode: completion
- context_size: 32768
+ context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 2000
+ default: 8000
min: 1
- max: 2000
+ max: 8000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
index 0e02526beb..ef8dd083ad 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@@ -6,7 +6,7 @@ features:
- agent-thought
model_properties:
mode: completion
- context_size: 32768
+ context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 2000
+ default: 8000
min: 1
- max: 2000
+ max: 8000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
index 65175f1b10..87a4417df5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@@ -1,12 +1,12 @@
-model: qwen-plus-0806
+model: qwen-plus-0723
label:
- en_US: qwen-plus-0806
+ en_US: qwen-plus-0723
model_type: llm
features:
- agent-thought
model_properties:
mode: completion
- context_size: 32768
+ context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 2000
+ default: 8000
min: 1
- max: 2000
+ max: 8000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
index 1c530dcba2..967f258fa9 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@@ -20,9 +20,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 2000
+ default: 8192
min: 1
- max: 2000
+ max: 8192
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
new file mode 100644
index 0000000000..9d44852ac9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@@ -0,0 +1,83 @@
+model: qwen-plus-0919
+label:
+ en_US: qwen-plus-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0008'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
index bc848072ed..df9448ae04 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +70,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
@@ -79,3 +83,4 @@ pricing:
output: '0.012'
unit: '0.001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
new file mode 100644
index 0000000000..32ccb8d615
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@@ -0,0 +1,83 @@
+model: qwen-plus-latest
+label:
+ en_US: qwen-plus-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0008'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
index e78b77c7f2..f3fce30209 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -3,9 +3,11 @@ label:
en_US: qwen-plus
model_type: llm
features:
+ - multi-tool-call
- agent-thought
+ - stream-tool-call
model_properties:
- mode: completion
+ mode: chat
context_size: 131072
parameter_rules:
- name: temperature
@@ -20,9 +22,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 2000
+ default: 8192
min: 1
- max: 2000
+ max: 8192
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,13 +70,16 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
- input: '0.004'
- output: '0.012'
+ input: '0.0008'
+ output: '0.002'
unit: '0.001'
currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
index 2c9857cf9f..bf976b518a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -5,8 +5,8 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
- context_size: 8192
+ mode: chat
+ context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 1500
+ default: 2000
min: 1
- max: 1500
+ max: 2000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
index 7ea5afc795..060e7fb4c9 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -5,8 +5,8 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
- context_size: 8192
+ mode: chat
+ context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 1500
+ default: 2000
min: 1
- max: 1500
+ max: 2000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
new file mode 100644
index 0000000000..97cd34929b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -0,0 +1,83 @@
+model: qwen-turbo-0919
+label:
+ en_US: qwen-turbo-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0003'
+ output: '0.0006'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
index f1950577ec..8d77ba7a2a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -62,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +70,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
@@ -79,3 +83,4 @@ pricing:
output: '0.006'
unit: '0.001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
new file mode 100644
index 0000000000..4458c706aa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
@@ -0,0 +1,83 @@
+model: qwen-turbo-latest
+label:
+ en_US: qwen-turbo-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0006'
+ output: '0.0003'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
index d4c03100ec..33f05967c2 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -3,10 +3,12 @@ label:
en_US: qwen-turbo
model_type: llm
features:
+ - multi-tool-call
- agent-thought
+ - stream-tool-call
model_properties:
- mode: completion
- context_size: 8192
+ mode: chat
+ context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
@@ -20,9 +22,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
type: int
- default: 1500
+ default: 2000
min: 1
- max: 1500
+ max: 2000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -60,6 +62,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,13 +70,16 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
- input: '0.002'
- output: '0.006'
+ input: '0.0006'
+ output: '0.0003'
unit: '0.001'
currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
index fffd732ca5..63b6074d0d 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
@@ -45,3 +45,4 @@ pricing:
output: '0.02'
unit: '0.001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
index af8742b981..12e9e0dd56 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
@@ -7,8 +7,17 @@ features:
- agent-thought
model_properties:
mode: chat
- context_size: 32768
+ context_size: 32000
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -28,6 +37,16 @@ parameter_rules:
help:
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
- name: seed
required: false
type: int
@@ -40,6 +59,27 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.02'
output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
index a93d456428..b811fdece4 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
@@ -7,8 +7,17 @@ features:
- agent-thought
model_properties:
mode: chat
- context_size: 32768
+ context_size: 32000
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -28,6 +37,16 @@ parameter_rules:
help:
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
- name: seed
required: false
type: int
@@ -40,6 +59,27 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.02'
output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
new file mode 100644
index 0000000000..188dea389a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
@@ -0,0 +1,87 @@
+model: qwen-vl-plus-0201
+label:
+ en_US: qwen-vl-plus-0201
+model_type: llm
+features:
+ - vision
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: response_format
+ use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.02'
+ unit: '0.001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
index 12573511b9..bc623e2f03 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
@@ -9,6 +9,15 @@ model_properties:
mode: chat
context_size: 32768
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -28,6 +37,16 @@ parameter_rules:
help:
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
- name: seed
required: false
type: int
@@ -40,6 +59,27 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.008'
output: '0.008'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
index 13468c44ee..8977e12e4f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
@@ -7,8 +7,17 @@ features:
- agent-thought
model_properties:
mode: chat
- context_size: 8192
+ context_size: 8000
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -28,6 +37,16 @@ parameter_rules:
help:
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
- name: seed
required: false
type: int
@@ -40,6 +59,27 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.008'
output: '0.008'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
index 8b204ff1f0..de237842af 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 4096
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
index 3875a274e7..1fda35abaf 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 4096
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
index 0920806845..06fd33c5f4 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 4096
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
index 824954323b..ebf8099553 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 32768
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
index c0a4b45be6..e9bc99339d 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 32768
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
index 92b67804e8..3ed85dade8 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 131072
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
index 960438e3e7..328519c168 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 131072
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
index 59a8827d9e..d1ed3c2a73 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 32768
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
index f14ee2daff..0e88c24aa8 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 131072
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
index 8ea8166358..35313cd1f7 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 131072
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
index 8ea8166358..35313cd1f7 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
@@ -5,7 +5,7 @@ model_type: llm
features:
- agent-thought
model_properties:
- mode: completion
+ mode: chat
context_size: 131072
parameter_rules:
- name: temperature
@@ -60,6 +60,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -67,6 +68,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
index de2c289c94..fabe6d90e6 100644
--- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
@@ -20,6 +20,7 @@ supported_model_types:
- text-embedding
configurate_methods:
- predefined-model
+ - customizable-model
provider_credential_schema:
credential_form_schemas:
- variable: dashscope_api_key
@@ -30,3 +31,20 @@ provider_credential_schema:
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter full model name
+ zh_Hans: 输入模型全称
+ credential_form_schemas:
+ - variable: dashscope_api_key
+ required: true
+ label:
+ en_US: API Key
+ type: secret-input
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py
index 4fadda5df5..286640079b 100644
--- a/api/core/model_runtime/model_providers/xinference/llm/llm.py
+++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -19,7 +19,6 @@ from openai.types.chat.chat_completion_message import FunctionCall
from openai.types.completion import Completion
from xinference_client.client.restful.restful_client import (
Client,
- RESTfulChatglmCppChatModelHandle,
RESTfulChatModelHandle,
RESTfulGenerateModelHandle,
)
@@ -491,7 +490,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
if tools and len(tools) > 0:
generate_config["tools"] = [{"type": "function", "function": helper.dump_model(tool)} for tool in tools]
vision = credentials.get("support_vision", False)
- if isinstance(xinference_model, RESTfulChatModelHandle | RESTfulChatglmCppChatModelHandle):
+ if isinstance(xinference_model, RESTfulChatModelHandle):
resp = client.chat.completions.create(
model=credentials["model_uid"],
messages=[self._convert_prompt_message_to_dict(message) for message in prompt_messages],
diff --git a/api/core/model_runtime/model_providers/xinference/tts/tts.py b/api/core/model_runtime/model_providers/xinference/tts/tts.py
index 10538b5788..81dbe397d2 100644
--- a/api/core/model_runtime/model_providers/xinference/tts/tts.py
+++ b/api/core/model_runtime/model_providers/xinference/tts/tts.py
@@ -208,21 +208,21 @@ class XinferenceText2SpeechModel(TTSModel):
executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
futures = [
executor.submit(
- handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=False
+ handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=True
)
for i in range(len(sentences))
]
for future in futures:
response = future.result()
- for i in range(0, len(response), 1024):
- yield response[i : i + 1024]
+ for chunk in response:
+ yield chunk
else:
response = handle.speech(
- input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=False
+ input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=True
)
- for i in range(0, len(response), 1024):
- yield response[i : i + 1024]
+ for chunk in response:
+ yield chunk
except Exception as ex:
raise InvokeBadRequestError(str(ex))
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
index b1f9b7485c..7fcf692202 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.1'
output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
index 4e7d5fd3cc..fcd7c7768c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.001'
output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
index 14f17db5d6..c9ae5abf19 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.01'
output: '0.01'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
index 3361474d73..98c4f72c72 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0'
output: '0'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
index bf0135d198..0b5391ce2f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 8192
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.001'
output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
index ab4b32dd82..62f453fb77 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.1'
output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
index d1b01731f5..350b080c3f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
@@ -49,6 +49,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.001'
output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
index 9ede308f18..2d7ebd71cf 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.05'
output: '0.05'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
index 28286580a7..3a1120ff37 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
@@ -44,6 +44,15 @@ parameter_rules:
default: 1024
min: 1
max: 1024
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.05'
output: '0.05'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
index 4c5fa24034..14b9623e5a 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
@@ -44,6 +44,15 @@ parameter_rules:
default: 1024
min: 1
max: 1024
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.01'
output: '0.01'
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py
index bf9b093cb3..fc71d64714 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py
@@ -1,7 +1,8 @@
from .__version__ import __version__
from ._client import ZhipuAI
-from .core._errors import (
+from .core import (
APIAuthenticationError,
+ APIConnectionError,
APIInternalError,
APIReachLimitError,
APIRequestFailedError,
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py
index 659f38d7ff..51f8c49ecb 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py
@@ -1 +1 @@
-__version__ = "v2.0.1"
+__version__ = "v2.1.0"
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py
index df9e506095..705d371e62 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py
@@ -9,15 +9,13 @@ from httpx import Timeout
from typing_extensions import override
from . import api_resource
-from .core import _jwt_token
-from .core._base_type import NOT_GIVEN, NotGiven
-from .core._errors import ZhipuAIError
-from .core._http_client import ZHIPUAI_DEFAULT_MAX_RETRIES, HttpClient
+from .core import NOT_GIVEN, ZHIPUAI_DEFAULT_MAX_RETRIES, HttpClient, NotGiven, ZhipuAIError, _jwt_token
class ZhipuAI(HttpClient):
- chat: api_resource.chat
+ chat: api_resource.chat.Chat
api_key: str
+ _disable_token_cache: bool = True
def __init__(
self,
@@ -28,10 +26,15 @@ class ZhipuAI(HttpClient):
max_retries: int = ZHIPUAI_DEFAULT_MAX_RETRIES,
http_client: httpx.Client | None = None,
custom_headers: Mapping[str, str] | None = None,
+ disable_token_cache: bool = True,
+ _strict_response_validation: bool = False,
) -> None:
if api_key is None:
- raise ZhipuAIError("No api_key provided, please provide it through parameters or environment variables")
+ api_key = os.environ.get("ZHIPUAI_API_KEY")
+ if api_key is None:
+ raise ZhipuAIError("未提供api_key,请通过参数或环境变量提供")
self.api_key = api_key
+ self._disable_token_cache = disable_token_cache
if base_url is None:
base_url = os.environ.get("ZHIPUAI_BASE_URL")
@@ -42,21 +45,31 @@ class ZhipuAI(HttpClient):
super().__init__(
version=__version__,
base_url=base_url,
+ max_retries=max_retries,
timeout=timeout,
custom_httpx_client=http_client,
custom_headers=custom_headers,
+ _strict_response_validation=_strict_response_validation,
)
self.chat = api_resource.chat.Chat(self)
self.images = api_resource.images.Images(self)
self.embeddings = api_resource.embeddings.Embeddings(self)
self.files = api_resource.files.Files(self)
self.fine_tuning = api_resource.fine_tuning.FineTuning(self)
+ self.batches = api_resource.Batches(self)
+ self.knowledge = api_resource.Knowledge(self)
+ self.tools = api_resource.Tools(self)
+ self.videos = api_resource.Videos(self)
+ self.assistant = api_resource.Assistant(self)
@property
@override
- def _auth_headers(self) -> dict[str, str]:
+ def auth_headers(self) -> dict[str, str]:
api_key = self.api_key
- return {"Authorization": f"{_jwt_token.generate_token(api_key)}"}
+ if self._disable_token_cache:
+ return {"Authorization": f"Bearer {api_key}"}
+ else:
+ return {"Authorization": f"Bearer {_jwt_token.generate_token(api_key)}"}
def __del__(self) -> None:
if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close") or not hasattr(self, "_client"):
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py
index 0a90e21e48..4fe0719dde 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py
@@ -1,5 +1,34 @@
-from .chat import chat
+from .assistant import (
+ Assistant,
+)
+from .batches import Batches
+from .chat import (
+ AsyncCompletions,
+ Chat,
+ Completions,
+)
from .embeddings import Embeddings
-from .files import Files
-from .fine_tuning import fine_tuning
+from .files import Files, FilesWithRawResponse
+from .fine_tuning import FineTuning
from .images import Images
+from .knowledge import Knowledge
+from .tools import Tools
+from .videos import (
+ Videos,
+)
+
+__all__ = [
+ "Videos",
+ "AsyncCompletions",
+ "Chat",
+ "Completions",
+ "Images",
+ "Embeddings",
+ "Files",
+ "FilesWithRawResponse",
+ "FineTuning",
+ "Batches",
+ "Knowledge",
+ "Tools",
+ "Assistant",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py
new file mode 100644
index 0000000000..ce619aa7f0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py
@@ -0,0 +1,3 @@
+from .assistant import Assistant
+
+__all__ = ["Assistant"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py
new file mode 100644
index 0000000000..f772340a82
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import httpx
+
+from ...core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ StreamResponse,
+ deepcopy_minimal,
+ make_request_options,
+ maybe_transform,
+)
+from ...types.assistant import AssistantCompletion
+from ...types.assistant.assistant_conversation_resp import ConversationUsageListResp
+from ...types.assistant.assistant_support_resp import AssistantSupportResp
+
+if TYPE_CHECKING:
+ from ..._client import ZhipuAI
+
+from ...types.assistant import assistant_conversation_params, assistant_create_params
+
+__all__ = ["Assistant"]
+
+
+class Assistant(BaseAPI):
+ def __init__(self, client: ZhipuAI) -> None:
+ super().__init__(client)
+
+ def conversation(
+ self,
+ assistant_id: str,
+ model: str,
+ messages: list[assistant_create_params.ConversationMessage],
+ *,
+ stream: bool = True,
+ conversation_id: Optional[str] = None,
+ attachments: Optional[list[assistant_create_params.AssistantAttachments]] = None,
+ metadata: dict | None = None,
+ request_id: str = None,
+ user_id: str = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> StreamResponse[AssistantCompletion]:
+ body = deepcopy_minimal(
+ {
+ "assistant_id": assistant_id,
+ "model": model,
+ "messages": messages,
+ "stream": stream,
+ "conversation_id": conversation_id,
+ "attachments": attachments,
+ "metadata": metadata,
+ "request_id": request_id,
+ "user_id": user_id,
+ }
+ )
+ return self._post(
+ "/assistant",
+ body=maybe_transform(body, assistant_create_params.AssistantParameters),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=AssistantCompletion,
+ stream=stream or True,
+ stream_cls=StreamResponse[AssistantCompletion],
+ )
+
+ def query_support(
+ self,
+ *,
+ assistant_id_list: list[str] = None,
+ request_id: str = None,
+ user_id: str = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AssistantSupportResp:
+ body = deepcopy_minimal(
+ {
+ "assistant_id_list": assistant_id_list,
+ "request_id": request_id,
+ "user_id": user_id,
+ }
+ )
+ return self._post(
+ "/assistant/list",
+ body=body,
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=AssistantSupportResp,
+ )
+
+ def query_conversation_usage(
+ self,
+ assistant_id: str,
+ page: int = 1,
+ page_size: int = 10,
+ *,
+ request_id: str = None,
+ user_id: str = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ConversationUsageListResp:
+ body = deepcopy_minimal(
+ {
+ "assistant_id": assistant_id,
+ "page": page,
+ "page_size": page_size,
+ "request_id": request_id,
+ "user_id": user_id,
+ }
+ )
+ return self._post(
+ "/assistant/conversation/list",
+ body=maybe_transform(body, assistant_conversation_params.ConversationParameters),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=ConversationUsageListResp,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py
new file mode 100644
index 0000000000..ae2f2be85e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Literal, Optional
+
+import httpx
+
+from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options, maybe_transform
+from ..core.pagination import SyncCursorPage
+from ..types import batch_create_params, batch_list_params
+from ..types.batch import Batch
+
+if TYPE_CHECKING:
+ from .._client import ZhipuAI
+
+
+class Batches(BaseAPI):
+ def __init__(self, client: ZhipuAI) -> None:
+ super().__init__(client)
+
+ def create(
+ self,
+ *,
+ completion_window: str | None = None,
+ endpoint: Literal["/v1/chat/completions", "/v1/embeddings"],
+ input_file_id: str,
+ metadata: Optional[dict[str, str]] | NotGiven = NOT_GIVEN,
+ auto_delete_input_file: bool = True,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ return self._post(
+ "/batches",
+ body=maybe_transform(
+ {
+ "completion_window": completion_window,
+ "endpoint": endpoint,
+ "input_file_id": input_file_id,
+ "metadata": metadata,
+ "auto_delete_input_file": auto_delete_input_file,
+ },
+ batch_create_params.BatchCreateParams,
+ ),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=Batch,
+ )
+
+ def retrieve(
+ self,
+ batch_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Retrieves a batch.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not batch_id:
+ raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+ return self._get(
+ f"/batches/{batch_id}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=Batch,
+ )
+
+ def list(
+ self,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[Batch]:
+ """List your organization's batches.
+
+ Args:
+ after: A cursor for use in pagination.
+
+ `after` is an object ID that defines your place
+ in the list. For instance, if you make a list request and receive 100 objects,
+ ending with obj_foo, your subsequent call can include after=obj_foo in order to
+ fetch the next page of the list.
+
+ limit: A limit on the number of objects to be returned. Limit can range between 1 and
+ 100, and the default is 20.
+
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._get_api_list(
+ "/batches",
+ page=SyncCursorPage[Batch],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ },
+ batch_list_params.BatchListParams,
+ ),
+ ),
+ model=Batch,
+ )
+
+ def cancel(
+ self,
+ batch_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Cancels an in-progress batch.
+
+ Args:
+ batch_id: The ID of the batch to cancel.
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+
+ """
+ if not batch_id:
+ raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+ return self._post(
+ f"/batches/{batch_id}/cancel",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=Batch,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py
index e69de29bb2..5cd8dc6f33 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py
@@ -0,0 +1,5 @@
+from .async_completions import AsyncCompletions
+from .chat import Chat
+from .completions import Completions
+
+__all__ = ["AsyncCompletions", "Chat", "Completions"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
index 1f80119739..05510a3ec4 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
@@ -1,13 +1,25 @@
from __future__ import annotations
+import logging
from typing import TYPE_CHECKING, Literal, Optional, Union
import httpx
-from ...core._base_api import BaseAPI
-from ...core._base_type import NOT_GIVEN, Headers, NotGiven
-from ...core._http_client import make_user_request_input
+from ...core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ drop_prefix_image_data,
+ make_request_options,
+ maybe_transform,
+)
from ...types.chat.async_chat_completion import AsyncCompletion, AsyncTaskStatus
+from ...types.chat.code_geex import code_geex_params
+from ...types.sensitive_word_check import SensitiveWordCheckRequest
+
+logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from ..._client import ZhipuAI
@@ -22,6 +34,7 @@ class AsyncCompletions(BaseAPI):
*,
model: str,
request_id: Optional[str] | NotGiven = NOT_GIVEN,
+ user_id: Optional[str] | NotGiven = NOT_GIVEN,
do_sample: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -29,50 +42,74 @@ class AsyncCompletions(BaseAPI):
seed: int | NotGiven = NOT_GIVEN,
messages: Union[str, list[str], list[int], list[list[int]], None],
stop: Optional[Union[str, list[str], None]] | NotGiven = NOT_GIVEN,
- sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN,
+ sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
tools: Optional[object] | NotGiven = NOT_GIVEN,
tool_choice: str | NotGiven = NOT_GIVEN,
+ meta: Optional[dict[str, str]] | NotGiven = NOT_GIVEN,
+ extra: Optional[code_geex_params.CodeGeexExtra] | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
- disable_strict_validation: Optional[bool] | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> AsyncTaskStatus:
_cast_type = AsyncTaskStatus
+ logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+ if temperature is not None and temperature != NOT_GIVEN:
+ if temperature <= 0:
+ do_sample = False
+ temperature = 0.01
+ # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperature不生效)") # noqa: E501
+ if temperature >= 1:
+ temperature = 0.99
+ # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间")
+ if top_p is not None and top_p != NOT_GIVEN:
+ if top_p >= 1:
+ top_p = 0.99
+ # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1")
+ if top_p <= 0:
+ top_p = 0.01
+ # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1")
- if disable_strict_validation:
- _cast_type = object
+ logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+ if isinstance(messages, list):
+ for item in messages:
+ if item.get("content"):
+ item["content"] = drop_prefix_image_data(item["content"])
+
+ body = {
+ "model": model,
+ "request_id": request_id,
+ "user_id": user_id,
+ "temperature": temperature,
+ "top_p": top_p,
+ "do_sample": do_sample,
+ "max_tokens": max_tokens,
+ "seed": seed,
+ "messages": messages,
+ "stop": stop,
+ "sensitive_word_check": sensitive_word_check,
+ "tools": tools,
+ "tool_choice": tool_choice,
+ "meta": meta,
+ "extra": maybe_transform(extra, code_geex_params.CodeGeexExtra),
+ }
return self._post(
"/async/chat/completions",
- body={
- "model": model,
- "request_id": request_id,
- "temperature": temperature,
- "top_p": top_p,
- "do_sample": do_sample,
- "max_tokens": max_tokens,
- "seed": seed,
- "messages": messages,
- "stop": stop,
- "sensitive_word_check": sensitive_word_check,
- "tools": tools,
- "tool_choice": tool_choice,
- },
- options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+ body=body,
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
cast_type=_cast_type,
- enable_stream=False,
+ stream=False,
)
def retrieve_completion_result(
self,
id: str,
extra_headers: Headers | None = None,
- disable_strict_validation: Optional[bool] | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Union[AsyncCompletion, AsyncTaskStatus]:
_cast_type = Union[AsyncCompletion, AsyncTaskStatus]
- if disable_strict_validation:
- _cast_type = object
return self._get(
path=f"/async-result/{id}",
cast_type=_cast_type,
- options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py
index 92362fc50a..b3cc46566c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py
@@ -1,17 +1,18 @@
from typing import TYPE_CHECKING
-from ...core._base_api import BaseAPI
+from ...core import BaseAPI, cached_property
from .async_completions import AsyncCompletions
from .completions import Completions
if TYPE_CHECKING:
- from ..._client import ZhipuAI
+ pass
class Chat(BaseAPI):
- completions: Completions
+ @cached_property
+ def completions(self) -> Completions:
+ return Completions(self._client)
- def __init__(self, client: "ZhipuAI") -> None:
- super().__init__(client)
- self.completions = Completions(client)
- self.asyncCompletions = AsyncCompletions(client)
+ @cached_property
+ def asyncCompletions(self) -> AsyncCompletions: # noqa: N802
+ return AsyncCompletions(self._client)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
index ec29f33864..8e5bb454e6 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
@@ -1,15 +1,28 @@
from __future__ import annotations
+import logging
from typing import TYPE_CHECKING, Literal, Optional, Union
import httpx
-from ...core._base_api import BaseAPI
-from ...core._base_type import NOT_GIVEN, Headers, NotGiven
-from ...core._http_client import make_user_request_input
-from ...core._sse_client import StreamResponse
+from ...core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ StreamResponse,
+ deepcopy_minimal,
+ drop_prefix_image_data,
+ make_request_options,
+ maybe_transform,
+)
from ...types.chat.chat_completion import Completion
from ...types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.code_geex import code_geex_params
+from ...types.sensitive_word_check import SensitiveWordCheckRequest
+
+logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from ..._client import ZhipuAI
@@ -24,6 +37,7 @@ class Completions(BaseAPI):
*,
model: str,
request_id: Optional[str] | NotGiven = NOT_GIVEN,
+ user_id: Optional[str] | NotGiven = NOT_GIVEN,
do_sample: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -32,23 +46,43 @@ class Completions(BaseAPI):
seed: int | NotGiven = NOT_GIVEN,
messages: Union[str, list[str], list[int], object, None],
stop: Optional[Union[str, list[str], None]] | NotGiven = NOT_GIVEN,
- sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN,
+ sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
tools: Optional[object] | NotGiven = NOT_GIVEN,
tool_choice: str | NotGiven = NOT_GIVEN,
+ meta: Optional[dict[str, str]] | NotGiven = NOT_GIVEN,
+ extra: Optional[code_geex_params.CodeGeexExtra] | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
- disable_strict_validation: Optional[bool] | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Completion | StreamResponse[ChatCompletionChunk]:
- _cast_type = Completion
- _stream_cls = StreamResponse[ChatCompletionChunk]
- if disable_strict_validation:
- _cast_type = object
- _stream_cls = StreamResponse[object]
- return self._post(
- "/chat/completions",
- body={
+ logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+ if temperature is not None and temperature != NOT_GIVEN:
+ if temperature <= 0:
+ do_sample = False
+ temperature = 0.01
+ # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperature不生效)") # noqa: E501
+ if temperature >= 1:
+ temperature = 0.99
+ # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间")
+ if top_p is not None and top_p != NOT_GIVEN:
+ if top_p >= 1:
+ top_p = 0.99
+ # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1")
+ if top_p <= 0:
+ top_p = 0.01
+ # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1")
+
+ logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+ if isinstance(messages, list):
+ for item in messages:
+ if item.get("content"):
+ item["content"] = drop_prefix_image_data(item["content"])
+
+ body = deepcopy_minimal(
+ {
"model": model,
"request_id": request_id,
+ "user_id": user_id,
"temperature": temperature,
"top_p": top_p,
"do_sample": do_sample,
@@ -60,11 +94,15 @@ class Completions(BaseAPI):
"stream": stream,
"tools": tools,
"tool_choice": tool_choice,
- },
- options=make_user_request_input(
- extra_headers=extra_headers,
- ),
- cast_type=_cast_type,
- enable_stream=stream or False,
- stream_cls=_stream_cls,
+ "meta": meta,
+ "extra": maybe_transform(extra, code_geex_params.CodeGeexExtra),
+ }
+ )
+ return self._post(
+ "/chat/completions",
+ body=body,
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=Completion,
+ stream=stream or False,
+ stream_cls=StreamResponse[ChatCompletionChunk],
)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py
index 2308a20451..4b4baef942 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py
@@ -4,9 +4,7 @@ from typing import TYPE_CHECKING, Optional, Union
import httpx
-from ..core._base_api import BaseAPI
-from ..core._base_type import NOT_GIVEN, Headers, NotGiven
-from ..core._http_client import make_user_request_input
+from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options
from ..types.embeddings import EmbeddingsResponded
if TYPE_CHECKING:
@@ -22,10 +20,13 @@ class Embeddings(BaseAPI):
*,
input: Union[str, list[str], list[int], list[list[int]]],
model: Union[str],
+ dimensions: Union[int] | NotGiven = NOT_GIVEN,
encoding_format: str | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
+ request_id: Optional[str] | NotGiven = NOT_GIVEN,
sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
disable_strict_validation: Optional[bool] | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> EmbeddingsResponded:
@@ -37,11 +38,13 @@ class Embeddings(BaseAPI):
body={
"input": input,
"model": model,
+ "dimensions": dimensions,
"encoding_format": encoding_format,
"user": user,
+ "request_id": request_id,
"sensitive_word_check": sensitive_word_check,
},
- options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
cast_type=_cast_type,
- enable_stream=False,
+ stream=False,
)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py
index f2ac74bffa..ba9de75b7e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py
@@ -1,19 +1,30 @@
from __future__ import annotations
-from typing import TYPE_CHECKING
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Literal, cast
import httpx
-from ..core._base_api import BaseAPI
-from ..core._base_type import NOT_GIVEN, FileTypes, Headers, NotGiven
-from ..core._files import is_file_content
-from ..core._http_client import make_user_request_input
-from ..types.file_object import FileObject, ListOfFileObject
+from ..core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ FileTypes,
+ Headers,
+ NotGiven,
+ _legacy_binary_response,
+ _legacy_response,
+ deepcopy_minimal,
+ extract_files,
+ make_request_options,
+ maybe_transform,
+)
+from ..types.files import FileDeleted, FileObject, ListOfFileObject, UploadDetail, file_create_params
if TYPE_CHECKING:
from .._client import ZhipuAI
-__all__ = ["Files"]
+__all__ = ["Files", "FilesWithRawResponse"]
class Files(BaseAPI):
@@ -23,30 +34,69 @@ class Files(BaseAPI):
def create(
self,
*,
- file: FileTypes,
- purpose: str,
+ file: FileTypes = None,
+ upload_detail: list[UploadDetail] = None,
+ purpose: Literal["fine-tune", "retrieval", "batch"],
+ knowledge_id: str = None,
+ sentence_size: int = None,
extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> FileObject:
- if not is_file_content(file):
- prefix = f"Expected file input `{file!r}`"
- raise RuntimeError(
- f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(file)} instead."
- ) from None
- files = [("file", file)]
-
- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+ if not file and not upload_detail:
+ raise ValueError("At least one of `file` and `upload_detail` must be provided.")
+ body = deepcopy_minimal(
+ {
+ "file": file,
+ "upload_detail": upload_detail,
+ "purpose": purpose,
+ "knowledge_id": knowledge_id,
+ "sentence_size": sentence_size,
+ }
+ )
+ files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+ if files:
+ # It should be noted that the actual Content-Type header that will be
+ # sent to the server will contain a `boundary` parameter, e.g.
+ # multipart/form-data; boundary=---abc--
+ extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return self._post(
"/files",
- body={
- "purpose": purpose,
- },
+ body=maybe_transform(body, file_create_params.FileCreateParams),
files=files,
- options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
cast_type=FileObject,
)
+ # def retrieve(
+ # self,
+ # file_id: str,
+ # *,
+ # extra_headers: Headers | None = None,
+ # extra_body: Body | None = None,
+ # timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ # ) -> FileObject:
+ # """
+ # Returns information about a specific file.
+ #
+ # Args:
+ # file_id: The ID of the file to retrieve information about
+ # extra_headers: Send extra headers
+ #
+ # extra_body: Add additional JSON properties to the request
+ #
+ # timeout: Override the client-level default timeout for this request, in seconds
+ # """
+ # if not file_id:
+ # raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+ # return self._get(
+ # f"/files/{file_id}",
+ # options=make_request_options(
+ # extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
+ # ),
+ # cast_type=FileObject,
+ # )
+
def list(
self,
*,
@@ -55,13 +105,15 @@ class Files(BaseAPI):
after: str | NotGiven = NOT_GIVEN,
order: str | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> ListOfFileObject:
return self._get(
"/files",
cast_type=ListOfFileObject,
- options=make_user_request_input(
+ options=make_request_options(
extra_headers=extra_headers,
+ extra_body=extra_body,
timeout=timeout,
query={
"purpose": purpose,
@@ -71,3 +123,72 @@ class Files(BaseAPI):
},
),
)
+
+ def delete(
+ self,
+ file_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> FileDeleted:
+ """
+ Delete a file.
+
+ Args:
+ file_id: The ID of the file to delete
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not file_id:
+ raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+ return self._delete(
+ f"/files/{file_id}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=FileDeleted,
+ )
+
+ def content(
+ self,
+ file_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> _legacy_response.HttpxBinaryResponseContent:
+ """
+ Returns the contents of the specified file.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not file_id:
+ raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+ extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+ return self._get(
+ f"/files/{file_id}/content",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=_legacy_binary_response.HttpxBinaryResponseContent,
+ )
+
+
+class FilesWithRawResponse:
+ def __init__(self, files: Files) -> None:
+ self._files = files
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ files.create,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ files.list,
+ )
+ self.content = _legacy_response.to_raw_response_wrapper(
+ files.content,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py
index e69de29bb2..7c309b8341 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py
@@ -0,0 +1,5 @@
+from .fine_tuning import FineTuning
+from .jobs import Jobs
+from .models import FineTunedModels
+
+__all__ = ["Jobs", "FineTunedModels", "FineTuning"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py
index dc30bd33ed..8670f7de00 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py
@@ -1,15 +1,18 @@
from typing import TYPE_CHECKING
-from ...core._base_api import BaseAPI
+from ...core import BaseAPI, cached_property
from .jobs import Jobs
+from .models import FineTunedModels
if TYPE_CHECKING:
- from ..._client import ZhipuAI
+ pass
class FineTuning(BaseAPI):
- jobs: Jobs
+ @cached_property
+ def jobs(self) -> Jobs:
+ return Jobs(self._client)
- def __init__(self, client: "ZhipuAI") -> None:
- super().__init__(client)
- self.jobs = Jobs(client)
+ @cached_property
+ def models(self) -> FineTunedModels:
+ return FineTunedModels(self._client)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..40777a153f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py
@@ -0,0 +1,3 @@
+from .jobs import Jobs
+
+__all__ = ["Jobs"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py
similarity index 53%
rename from api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py
rename to api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py
index 3d2e9208a1..8b038cadc0 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py
@@ -4,13 +4,23 @@ from typing import TYPE_CHECKING, Optional
import httpx
-from ...core._base_api import BaseAPI
-from ...core._base_type import NOT_GIVEN, Headers, NotGiven
-from ...core._http_client import make_user_request_input
-from ...types.fine_tuning import FineTuningJob, FineTuningJobEvent, ListOfFineTuningJob, job_create_params
+from ....core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ make_request_options,
+)
+from ....types.fine_tuning import (
+ FineTuningJob,
+ FineTuningJobEvent,
+ ListOfFineTuningJob,
+ job_create_params,
+)
if TYPE_CHECKING:
- from ..._client import ZhipuAI
+ from ...._client import ZhipuAI
__all__ = ["Jobs"]
@@ -29,6 +39,7 @@ class Jobs(BaseAPI):
request_id: Optional[str] | NotGiven = NOT_GIVEN,
validation_file: Optional[str] | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> FineTuningJob:
return self._post(
@@ -41,7 +52,7 @@ class Jobs(BaseAPI):
"validation_file": validation_file,
"request_id": request_id,
},
- options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
cast_type=FineTuningJob,
)
@@ -50,11 +61,12 @@ class Jobs(BaseAPI):
fine_tuning_job_id: str,
*,
extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> FineTuningJob:
return self._get(
f"/fine_tuning/jobs/{fine_tuning_job_id}",
- options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
cast_type=FineTuningJob,
)
@@ -64,13 +76,15 @@ class Jobs(BaseAPI):
after: str | NotGiven = NOT_GIVEN,
limit: int | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> ListOfFineTuningJob:
return self._get(
"/fine_tuning/jobs",
cast_type=ListOfFineTuningJob,
- options=make_user_request_input(
+ options=make_request_options(
extra_headers=extra_headers,
+ extra_body=extra_body,
timeout=timeout,
query={
"after": after,
@@ -79,6 +93,24 @@ class Jobs(BaseAPI):
),
)
+ def cancel(
+ self,
+ fine_tuning_job_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # noqa: E501
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> FineTuningJob:
+ if not fine_tuning_job_id:
+ raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+ return self._post(
+ f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=FineTuningJob,
+ )
+
def list_events(
self,
fine_tuning_job_id: str,
@@ -86,13 +118,15 @@ class Jobs(BaseAPI):
after: str | NotGiven = NOT_GIVEN,
limit: int | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> FineTuningJobEvent:
return self._get(
f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
cast_type=FineTuningJobEvent,
- options=make_user_request_input(
+ options=make_request_options(
extra_headers=extra_headers,
+ extra_body=extra_body,
timeout=timeout,
query={
"after": after,
@@ -100,3 +134,19 @@ class Jobs(BaseAPI):
},
),
)
+
+ def delete(
+ self,
+ fine_tuning_job_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> FineTuningJob:
+ if not fine_tuning_job_id:
+ raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+ return self._delete(
+ f"/fine_tuning/jobs/{fine_tuning_job_id}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=FineTuningJob,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py
new file mode 100644
index 0000000000..d832635baf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py
@@ -0,0 +1,3 @@
+from .fine_tuned_models import FineTunedModels
+
+__all__ = ["FineTunedModels"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py
new file mode 100644
index 0000000000..29c023e3b1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import httpx
+
+from ....core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ make_request_options,
+)
+from ....types.fine_tuning.models import FineTunedModelsStatus
+
+if TYPE_CHECKING:
+ from ...._client import ZhipuAI
+
+__all__ = ["FineTunedModels"]
+
+
+class FineTunedModels(BaseAPI):
+ def __init__(self, client: ZhipuAI) -> None:
+ super().__init__(client)
+
+ def delete(
+ self,
+ fine_tuned_model: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> FineTunedModelsStatus:
+ if not fine_tuned_model:
+ raise ValueError(f"Expected a non-empty value for `fine_tuned_model` but received {fine_tuned_model!r}")
+ return self._delete(
+ f"fine_tuning/fine_tuned_models/{fine_tuned_model}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=FineTunedModelsStatus,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py
index 2692b093af..8ad411913f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py
@@ -4,10 +4,9 @@ from typing import TYPE_CHECKING, Optional
import httpx
-from ..core._base_api import BaseAPI
-from ..core._base_type import NOT_GIVEN, Body, Headers, NotGiven
-from ..core._http_client import make_user_request_input
+from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options
from ..types.image import ImagesResponded
+from ..types.sensitive_word_check import SensitiveWordCheckRequest
if TYPE_CHECKING:
from .._client import ZhipuAI
@@ -27,8 +26,10 @@ class Images(BaseAPI):
response_format: Optional[str] | NotGiven = NOT_GIVEN,
size: Optional[str] | NotGiven = NOT_GIVEN,
style: Optional[str] | NotGiven = NOT_GIVEN,
+ sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
request_id: Optional[str] | NotGiven = NOT_GIVEN,
+ user_id: Optional[str] | NotGiven = NOT_GIVEN,
extra_headers: Headers | None = None,
extra_body: Body | None = None,
disable_strict_validation: Optional[bool] | None = None,
@@ -45,12 +46,14 @@ class Images(BaseAPI):
"n": n,
"quality": quality,
"response_format": response_format,
+ "sensitive_word_check": sensitive_word_check,
"size": size,
"style": style,
"user": user,
+ "user_id": user_id,
"request_id": request_id,
},
- options=make_user_request_input(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
cast_type=_cast_type,
- enable_stream=False,
+ stream=False,
)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py
new file mode 100644
index 0000000000..5a67d743c3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py
@@ -0,0 +1,3 @@
+from .knowledge import Knowledge
+
+__all__ = ["Knowledge"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py
new file mode 100644
index 0000000000..fd289e2232
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py
@@ -0,0 +1,3 @@
+from .document import Document
+
+__all__ = ["Document"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py
new file mode 100644
index 0000000000..2c4066d893
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Literal, Optional, cast
+
+import httpx
+
+from ....core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ FileTypes,
+ Headers,
+ NotGiven,
+ deepcopy_minimal,
+ extract_files,
+ make_request_options,
+ maybe_transform,
+)
+from ....types.files import UploadDetail, file_create_params
+from ....types.knowledge.document import DocumentData, DocumentObject, document_edit_params, document_list_params
+from ....types.knowledge.document.document_list_resp import DocumentPage
+
+if TYPE_CHECKING:
+ from ...._client import ZhipuAI
+
+__all__ = ["Document"]
+
+
+class Document(BaseAPI):
+ def __init__(self, client: ZhipuAI) -> None:
+ super().__init__(client)
+
+ def create(
+ self,
+ *,
+ file: FileTypes = None,
+ custom_separator: Optional[list[str]] = None,
+ upload_detail: list[UploadDetail] = None,
+ purpose: Literal["retrieval"],
+ knowledge_id: str = None,
+ sentence_size: int = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> DocumentObject:
+ if not file and not upload_detail:
+ raise ValueError("At least one of `file` and `upload_detail` must be provided.")
+ body = deepcopy_minimal(
+ {
+ "file": file,
+ "upload_detail": upload_detail,
+ "purpose": purpose,
+ "custom_separator": custom_separator,
+ "knowledge_id": knowledge_id,
+ "sentence_size": sentence_size,
+ }
+ )
+ files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+ if files:
+ # It should be noted that the actual Content-Type header that will be
+ # sent to the server will contain a `boundary` parameter, e.g.
+ # multipart/form-data; boundary=---abc--
+ extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+ return self._post(
+ "/files",
+ body=maybe_transform(body, file_create_params.FileCreateParams),
+ files=files,
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=DocumentObject,
+ )
+
+ def edit(
+ self,
+ document_id: str,
+ knowledge_type: str,
+ *,
+ custom_separator: Optional[list[str]] = None,
+ sentence_size: Optional[int] = None,
+ callback_url: Optional[str] = None,
+ callback_header: Optional[dict[str, str]] = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> httpx.Response:
+ """
+
+ Args:
+ document_id: 知识id
+ knowledge_type: 知识类型:
+ 1:文章知识: 支持pdf,url,docx
+ 2.问答知识-文档: 支持pdf,url,docx
+ 3.问答知识-表格: 支持xlsx
+ 4.商品库-表格: 支持xlsx
+ 5.自定义: 支持pdf,url,docx
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ :param knowledge_type:
+ :param document_id:
+ :param timeout:
+ :param extra_body:
+ :param callback_header:
+ :param sentence_size:
+ :param extra_headers:
+ :param callback_url:
+ :param custom_separator:
+ """
+ if not document_id:
+ raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}")
+
+ body = deepcopy_minimal(
+ {
+ "id": document_id,
+ "knowledge_type": knowledge_type,
+ "custom_separator": custom_separator,
+ "sentence_size": sentence_size,
+ "callback_url": callback_url,
+ "callback_header": callback_header,
+ }
+ )
+
+ return self._put(
+ f"/document/{document_id}",
+ body=maybe_transform(body, document_edit_params.DocumentEditParams),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=httpx.Response,
+ )
+
+ def list(
+ self,
+ knowledge_id: str,
+ *,
+ purpose: str | NotGiven = NOT_GIVEN,
+ page: str | NotGiven = NOT_GIVEN,
+ limit: str | NotGiven = NOT_GIVEN,
+ order: Literal["desc", "asc"] | NotGiven = NOT_GIVEN,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> DocumentPage:
+ return self._get(
+ "/files",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "knowledge_id": knowledge_id,
+ "purpose": purpose,
+ "page": page,
+ "limit": limit,
+ "order": order,
+ },
+ document_list_params.DocumentListParams,
+ ),
+ ),
+ cast_type=DocumentPage,
+ )
+
+ def delete(
+ self,
+ document_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> httpx.Response:
+ """
+ Delete a file.
+
+ Args:
+
+ document_id: 知识id
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not document_id:
+ raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}")
+
+ return self._delete(
+ f"/document/{document_id}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=httpx.Response,
+ )
+
+ def retrieve(
+ self,
+ document_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> DocumentData:
+ """
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not document_id:
+ raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}")
+
+ return self._get(
+ f"/document/{document_id}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=DocumentData,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py
new file mode 100644
index 0000000000..fea4c73ac9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Literal, Optional
+
+import httpx
+
+from ...core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ cached_property,
+ deepcopy_minimal,
+ make_request_options,
+ maybe_transform,
+)
+from ...types.knowledge import KnowledgeInfo, KnowledgeUsed, knowledge_create_params, knowledge_list_params
+from ...types.knowledge.knowledge_list_resp import KnowledgePage
+from .document import Document
+
+if TYPE_CHECKING:
+ from ..._client import ZhipuAI
+
+__all__ = ["Knowledge"]
+
+
+class Knowledge(BaseAPI):
+ def __init__(self, client: ZhipuAI) -> None:
+ super().__init__(client)
+
+ @cached_property
+ def document(self) -> Document:
+ return Document(self._client)
+
+ def create(
+ self,
+ embedding_id: int,
+ name: str,
+ *,
+ customer_identifier: Optional[str] = None,
+ description: Optional[str] = None,
+ background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None,
+ icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None,
+ bucket_id: Optional[str] = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> KnowledgeInfo:
+ body = deepcopy_minimal(
+ {
+ "embedding_id": embedding_id,
+ "name": name,
+ "customer_identifier": customer_identifier,
+ "description": description,
+ "background": background,
+ "icon": icon,
+ "bucket_id": bucket_id,
+ }
+ )
+ return self._post(
+ "/knowledge",
+ body=maybe_transform(body, knowledge_create_params.KnowledgeBaseParams),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=KnowledgeInfo,
+ )
+
+ def modify(
+ self,
+ knowledge_id: str,
+ embedding_id: int,
+ *,
+ name: str,
+ description: Optional[str] = None,
+ background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None,
+ icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> httpx.Response:
+ body = deepcopy_minimal(
+ {
+ "id": knowledge_id,
+ "embedding_id": embedding_id,
+ "name": name,
+ "description": description,
+ "background": background,
+ "icon": icon,
+ }
+ )
+ return self._put(
+ f"/knowledge/{knowledge_id}",
+ body=maybe_transform(body, knowledge_create_params.KnowledgeBaseParams),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=httpx.Response,
+ )
+
+ def query(
+ self,
+ *,
+ page: int | NotGiven = 1,
+ size: int | NotGiven = 10,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> KnowledgePage:
+ return self._get(
+ "/knowledge",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "page": page,
+ "size": size,
+ },
+ knowledge_list_params.KnowledgeListParams,
+ ),
+ ),
+ cast_type=KnowledgePage,
+ )
+
+ def delete(
+ self,
+ knowledge_id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> httpx.Response:
+ """
+ Delete a file.
+
+ Args:
+ knowledge_id: 知识库ID
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not knowledge_id:
+ raise ValueError("Expected a non-empty value for `knowledge_id`")
+
+ return self._delete(
+ f"/knowledge/{knowledge_id}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=httpx.Response,
+ )
+
+ def used(
+ self,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> KnowledgeUsed:
+ """
+ Returns the contents of the specified file.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._get(
+ "/knowledge/capacity",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=KnowledgeUsed,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py
new file mode 100644
index 0000000000..43e4e37da1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py
@@ -0,0 +1,3 @@
+from .tools import Tools
+
+__all__ = ["Tools"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py
new file mode 100644
index 0000000000..3c3a630aff
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Literal, Optional, Union
+
+import httpx
+
+from ...core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ StreamResponse,
+ deepcopy_minimal,
+ make_request_options,
+ maybe_transform,
+)
+from ...types.tools import WebSearch, WebSearchChunk, tools_web_search_params
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+ from ..._client import ZhipuAI
+
+__all__ = ["Tools"]
+
+
+class Tools(BaseAPI):
+ def __init__(self, client: ZhipuAI) -> None:
+ super().__init__(client)
+
+ def web_search(
+ self,
+ *,
+ model: str,
+ request_id: Optional[str] | NotGiven = NOT_GIVEN,
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+ messages: Union[str, list[str], list[int], object, None],
+ scope: Optional[str] | NotGiven = NOT_GIVEN,
+ location: Optional[str] | NotGiven = NOT_GIVEN,
+ recent_days: Optional[int] | NotGiven = NOT_GIVEN,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> WebSearch | StreamResponse[WebSearchChunk]:
+ body = deepcopy_minimal(
+ {
+ "model": model,
+ "request_id": request_id,
+ "messages": messages,
+ "stream": stream,
+ "scope": scope,
+ "location": location,
+ "recent_days": recent_days,
+ }
+ )
+ return self._post(
+ "/tools",
+ body=maybe_transform(body, tools_web_search_params.WebSearchParams),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=WebSearch,
+ stream=stream or False,
+ stream_cls=StreamResponse[WebSearchChunk],
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py
new file mode 100644
index 0000000000..6b0f99ed09
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py
@@ -0,0 +1,7 @@
+from .videos import (
+ Videos,
+)
+
+__all__ = [
+ "Videos",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py
new file mode 100644
index 0000000000..f1f1c08036
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import httpx
+
+from ...core import (
+ NOT_GIVEN,
+ BaseAPI,
+ Body,
+ Headers,
+ NotGiven,
+ deepcopy_minimal,
+ make_request_options,
+ maybe_transform,
+)
+from ...types.sensitive_word_check import SensitiveWordCheckRequest
+from ...types.video import VideoObject, video_create_params
+
+if TYPE_CHECKING:
+ from ..._client import ZhipuAI
+
+__all__ = ["Videos"]
+
+
+class Videos(BaseAPI):
+ def __init__(self, client: ZhipuAI) -> None:
+ super().__init__(client)
+
+ def generations(
+ self,
+ model: str,
+ *,
+ prompt: str = None,
+ image_url: str = None,
+ sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
+ request_id: str = None,
+ user_id: str = None,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> VideoObject:
+ if not model and not model:
+ raise ValueError("At least one of `model` and `prompt` must be provided.")
+ body = deepcopy_minimal(
+ {
+ "model": model,
+ "prompt": prompt,
+ "image_url": image_url,
+ "sensitive_word_check": sensitive_word_check,
+ "request_id": request_id,
+ "user_id": user_id,
+ }
+ )
+ return self._post(
+ "/videos/generations",
+ body=maybe_transform(body, video_create_params.VideoCreateParams),
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=VideoObject,
+ )
+
+ def retrieve_videos_result(
+ self,
+ id: str,
+ *,
+ extra_headers: Headers | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> VideoObject:
+ if not id:
+ raise ValueError("At least one of `id` must be provided.")
+
+ return self._get(
+ f"/async-result/{id}",
+ options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+ cast_type=VideoObject,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py
index e69de29bb2..3d6466d279 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py
@@ -0,0 +1,108 @@
+from ._base_api import BaseAPI
+from ._base_compat import (
+ PYDANTIC_V2,
+ ConfigDict,
+ GenericModel,
+ cached_property,
+ field_get_default,
+ get_args,
+ get_model_config,
+ get_model_fields,
+ get_origin,
+ is_literal_type,
+ is_union,
+ parse_obj,
+)
+from ._base_models import BaseModel, construct_type
+from ._base_type import (
+ NOT_GIVEN,
+ Body,
+ FileTypes,
+ Headers,
+ IncEx,
+ ModelT,
+ NotGiven,
+ Query,
+)
+from ._constants import (
+ ZHIPUAI_DEFAULT_LIMITS,
+ ZHIPUAI_DEFAULT_MAX_RETRIES,
+ ZHIPUAI_DEFAULT_TIMEOUT,
+)
+from ._errors import (
+ APIAuthenticationError,
+ APIConnectionError,
+ APIInternalError,
+ APIReachLimitError,
+ APIRequestFailedError,
+ APIResponseError,
+ APIResponseValidationError,
+ APIServerFlowExceedError,
+ APIStatusError,
+ APITimeoutError,
+ ZhipuAIError,
+)
+from ._files import is_file_content
+from ._http_client import HttpClient, make_request_options
+from ._sse_client import StreamResponse
+from ._utils import (
+ deepcopy_minimal,
+ drop_prefix_image_data,
+ extract_files,
+ is_given,
+ is_list,
+ is_mapping,
+ maybe_transform,
+ parse_date,
+ parse_datetime,
+)
+
+__all__ = [
+ "BaseModel",
+ "construct_type",
+ "BaseAPI",
+ "NOT_GIVEN",
+ "Headers",
+ "NotGiven",
+ "Body",
+ "IncEx",
+ "ModelT",
+ "Query",
+ "FileTypes",
+ "PYDANTIC_V2",
+ "ConfigDict",
+ "GenericModel",
+ "get_args",
+ "is_union",
+ "parse_obj",
+ "get_origin",
+ "is_literal_type",
+ "get_model_config",
+ "get_model_fields",
+ "field_get_default",
+ "is_file_content",
+ "ZhipuAIError",
+ "APIStatusError",
+ "APIRequestFailedError",
+ "APIAuthenticationError",
+ "APIReachLimitError",
+ "APIInternalError",
+ "APIServerFlowExceedError",
+ "APIResponseError",
+ "APIResponseValidationError",
+ "APITimeoutError",
+ "make_request_options",
+ "HttpClient",
+ "ZHIPUAI_DEFAULT_TIMEOUT",
+ "ZHIPUAI_DEFAULT_MAX_RETRIES",
+ "ZHIPUAI_DEFAULT_LIMITS",
+ "is_list",
+ "is_mapping",
+ "parse_date",
+ "parse_datetime",
+ "is_given",
+ "maybe_transform",
+ "deepcopy_minimal",
+ "extract_files",
+ "StreamResponse",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py
index 10b46ff8e3..3592ea6bac 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py
@@ -16,3 +16,4 @@ class BaseAPI:
self._post = client.post
self._put = client.put
self._patch = client.patch
+ self._get_api_list = client.get_api_list
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py
new file mode 100644
index 0000000000..92a5d683be
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py
@@ -0,0 +1,209 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from datetime import date, datetime
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, overload
+
+import pydantic
+from pydantic.fields import FieldInfo
+from typing_extensions import Self
+
+from ._base_type import StrBytesIntFloat
+
+_T = TypeVar("_T")
+_ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
+
+# --------------- Pydantic v2 compatibility ---------------
+
+# Pyright incorrectly reports some of our functions as overriding a method when they don't
+# pyright: reportIncompatibleMethodOverride=false
+
+PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
+
+# v1 re-exports
+if TYPE_CHECKING:
+
+ def parse_date(value: date | StrBytesIntFloat) -> date: ...
+
+ def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime: ...
+
+ def get_args(t: type[Any]) -> tuple[Any, ...]: ...
+
+ def is_union(tp: type[Any] | None) -> bool: ...
+
+ def get_origin(t: type[Any]) -> type[Any] | None: ...
+
+ def is_literal_type(type_: type[Any]) -> bool: ...
+
+ def is_typeddict(type_: type[Any]) -> bool: ...
+
+else:
+ if PYDANTIC_V2:
+ from pydantic.v1.typing import ( # noqa: I001
+ get_args as get_args, # noqa: PLC0414
+ is_union as is_union, # noqa: PLC0414
+ get_origin as get_origin, # noqa: PLC0414
+ is_typeddict as is_typeddict, # noqa: PLC0414
+ is_literal_type as is_literal_type, # noqa: PLC0414
+ )
+ from pydantic.v1.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime # noqa: PLC0414
+ else:
+ from pydantic.typing import ( # noqa: I001
+ get_args as get_args, # noqa: PLC0414
+ is_union as is_union, # noqa: PLC0414
+ get_origin as get_origin, # noqa: PLC0414
+ is_typeddict as is_typeddict, # noqa: PLC0414
+ is_literal_type as is_literal_type, # noqa: PLC0414
+ )
+ from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime # noqa: PLC0414
+
+
+# refactored config
+if TYPE_CHECKING:
+ from pydantic import ConfigDict
+else:
+ if PYDANTIC_V2:
+ from pydantic import ConfigDict
+ else:
+ # TODO: provide an error message here?
+ ConfigDict = None
+
+
+# renamed methods / properties
+def parse_obj(model: type[_ModelT], value: object) -> _ModelT:
+ if PYDANTIC_V2:
+ return model.model_validate(value)
+ else:
+ # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+ return cast(_ModelT, model.parse_obj(value))
+
+
+def field_is_required(field: FieldInfo) -> bool:
+ if PYDANTIC_V2:
+ return field.is_required()
+ return field.required # type: ignore
+
+
+def field_get_default(field: FieldInfo) -> Any:
+ value = field.get_default()
+ if PYDANTIC_V2:
+ from pydantic_core import PydanticUndefined
+
+ if value == PydanticUndefined:
+ return None
+ return value
+ return value
+
+
+def field_outer_type(field: FieldInfo) -> Any:
+ if PYDANTIC_V2:
+ return field.annotation
+ return field.outer_type_ # type: ignore
+
+
+def get_model_config(model: type[pydantic.BaseModel]) -> Any:
+ if PYDANTIC_V2:
+ return model.model_config
+ return model.__config__ # type: ignore
+
+
+def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
+ if PYDANTIC_V2:
+ return model.model_fields
+ return model.__fields__ # type: ignore
+
+
+def model_copy(model: _ModelT) -> _ModelT:
+ if PYDANTIC_V2:
+ return model.model_copy()
+ return model.copy() # type: ignore
+
+
+def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
+ if PYDANTIC_V2:
+ return model.model_dump_json(indent=indent)
+ return model.json(indent=indent) # type: ignore
+
+
+def model_dump(
+ model: pydantic.BaseModel,
+ *,
+ exclude_unset: bool = False,
+ exclude_defaults: bool = False,
+) -> dict[str, Any]:
+ if PYDANTIC_V2:
+ return model.model_dump(
+ exclude_unset=exclude_unset,
+ exclude_defaults=exclude_defaults,
+ )
+ return cast(
+ "dict[str, Any]",
+ model.dict( # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+ exclude_unset=exclude_unset,
+ exclude_defaults=exclude_defaults,
+ ),
+ )
+
+
+def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
+ if PYDANTIC_V2:
+ return model.model_validate(data)
+ return model.parse_obj(data) # pyright: ignore[reportDeprecated]
+
+
+# generic models
+if TYPE_CHECKING:
+
+ class GenericModel(pydantic.BaseModel): ...
+
+else:
+ if PYDANTIC_V2:
+ # there no longer needs to be a distinction in v2 but
+ # we still have to create our own subclass to avoid
+ # inconsistent MRO ordering errors
+ class GenericModel(pydantic.BaseModel): ...
+
+ else:
+ import pydantic.generics
+
+ class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel): ...
+
+
+# cached properties
+if TYPE_CHECKING:
+ cached_property = property
+
+ # we define a separate type (copied from typeshed)
+ # that represents that `cached_property` is `set`able
+ # at runtime, which differs from `@property`.
+ #
+ # this is a separate type as editors likely special case
+ # `@property` and we don't want to cause issues just to have
+ # more helpful internal types.
+
+ class typed_cached_property(Generic[_T]): # noqa: N801
+ func: Callable[[Any], _T]
+ attrname: str | None
+
+ def __init__(self, func: Callable[[Any], _T]) -> None: ...
+
+ @overload
+ def __get__(self, instance: None, owner: type[Any] | None = None) -> Self: ...
+
+ @overload
+ def __get__(self, instance: object, owner: type[Any] | None = None) -> _T: ...
+
+ def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
+ raise NotImplementedError()
+
+ def __set_name__(self, owner: type[Any], name: str) -> None: ...
+
+ # __set__ is not defined at runtime, but @cached_property is designed to be settable
+ def __set__(self, instance: object, value: _T) -> None: ...
+else:
+ try:
+ from functools import cached_property
+ except ImportError:
+ from cached_property import cached_property
+
+ typed_cached_property = cached_property
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
new file mode 100644
index 0000000000..6d8ba700b7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
@@ -0,0 +1,670 @@
+from __future__ import annotations
+
+import inspect
+import os
+from collections.abc import Callable
+from datetime import date, datetime
+from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, TypeGuard, TypeVar, cast
+
+import pydantic
+import pydantic.generics
+from pydantic.fields import FieldInfo
+from typing_extensions import (
+ ParamSpec,
+ Protocol,
+ override,
+ runtime_checkable,
+)
+
+from ._base_compat import (
+ PYDANTIC_V2,
+ ConfigDict,
+ field_get_default,
+ get_args,
+ get_model_config,
+ get_model_fields,
+ get_origin,
+ is_literal_type,
+ is_union,
+ parse_obj,
+)
+from ._base_compat import (
+ GenericModel as BaseGenericModel,
+)
+from ._base_type import (
+ IncEx,
+ ModelT,
+)
+from ._utils import (
+ PropertyInfo,
+ coerce_boolean,
+ extract_type_arg,
+ is_annotated_type,
+ is_list,
+ is_mapping,
+ parse_date,
+ parse_datetime,
+ strip_annotated_type,
+)
+
+if TYPE_CHECKING:
+ from pydantic_core.core_schema import LiteralSchema, ModelField, ModelFieldsSchema
+
+__all__ = ["BaseModel", "GenericModel"]
+_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel")
+
+_T = TypeVar("_T")
+P = ParamSpec("P")
+
+
+@runtime_checkable
+class _ConfigProtocol(Protocol):
+ allow_population_by_field_name: bool
+
+
+class BaseModel(pydantic.BaseModel):
+ if PYDANTIC_V2:
+ model_config: ClassVar[ConfigDict] = ConfigDict(
+ extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
+ )
+ else:
+
+ @property
+ @override
+ def model_fields_set(self) -> set[str]:
+ # a forwards-compat shim for pydantic v2
+ return self.__fields_set__ # type: ignore
+
+ class Config(pydantic.BaseConfig): # pyright: ignore[reportDeprecated]
+ extra: Any = pydantic.Extra.allow # type: ignore
+
+ def to_dict(
+ self,
+ *,
+ mode: Literal["json", "python"] = "python",
+ use_api_names: bool = True,
+ exclude_unset: bool = True,
+ exclude_defaults: bool = False,
+ exclude_none: bool = False,
+ warnings: bool = True,
+ ) -> dict[str, object]:
+ """Recursively generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+ By default, fields that were not set by the API will not be included,
+ and keys will match the API response, *not* the property names from the model.
+
+ For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+ the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+ Args:
+ mode:
+ If mode is 'json', the dictionary will only contain JSON serializable types. e.g. `datetime` will be turned into a string, `"2024-3-22T18:11:19.117000Z"`.
+ If mode is 'python', the dictionary may contain any Python objects. e.g. `datetime(2024, 3, 22)`
+
+ use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+ exclude_unset: Whether to exclude fields that have not been explicitly set.
+ exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+ exclude_none: Whether to exclude fields that have a value of `None` from the output.
+ warnings: Whether to log warnings when invalid fields are encountered. This is only supported in Pydantic v2.
+ """ # noqa: E501
+ return self.model_dump(
+ mode=mode,
+ by_alias=use_api_names,
+ exclude_unset=exclude_unset,
+ exclude_defaults=exclude_defaults,
+ exclude_none=exclude_none,
+ warnings=warnings,
+ )
+
+ def to_json(
+ self,
+ *,
+ indent: int | None = 2,
+ use_api_names: bool = True,
+ exclude_unset: bool = True,
+ exclude_defaults: bool = False,
+ exclude_none: bool = False,
+ warnings: bool = True,
+ ) -> str:
+ """Generates a JSON string representing this model as it would be received from or sent to the API (but with indentation).
+
+ By default, fields that were not set by the API will not be included,
+ and keys will match the API response, *not* the property names from the model.
+
+ For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+ the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+ Args:
+ indent: Indentation to use in the JSON output. If `None` is passed, the output will be compact. Defaults to `2`
+ use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+ exclude_unset: Whether to exclude fields that have not been explicitly set.
+ exclude_defaults: Whether to exclude fields that have the default value.
+ exclude_none: Whether to exclude fields that have a value of `None`.
+ warnings: Whether to show any warnings that occurred during serialization. This is only supported in Pydantic v2.
+ """ # noqa: E501
+ return self.model_dump_json(
+ indent=indent,
+ by_alias=use_api_names,
+ exclude_unset=exclude_unset,
+ exclude_defaults=exclude_defaults,
+ exclude_none=exclude_none,
+ warnings=warnings,
+ )
+
+ @override
+ def __str__(self) -> str:
+ # mypy complains about an invalid self arg
+ return f'{self.__repr_name__()}({self.__repr_str__(", ")})' # type: ignore[misc]
+
+ # Override the 'construct' method in a way that supports recursive parsing without validation.
+ # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
+ @classmethod
+ @override
+ def construct(
+ cls: type[ModelT],
+ _fields_set: set[str] | None = None,
+ **values: object,
+ ) -> ModelT:
+ m = cls.__new__(cls)
+ fields_values: dict[str, object] = {}
+
+ config = get_model_config(cls)
+ populate_by_name = (
+ config.allow_population_by_field_name
+ if isinstance(config, _ConfigProtocol)
+ else config.get("populate_by_name")
+ )
+
+ if _fields_set is None:
+ _fields_set = set()
+
+ model_fields = get_model_fields(cls)
+ for name, field in model_fields.items():
+ key = field.alias
+ if key is None or (key not in values and populate_by_name):
+ key = name
+
+ if key in values:
+ fields_values[name] = _construct_field(value=values[key], field=field, key=key)
+ _fields_set.add(name)
+ else:
+ fields_values[name] = field_get_default(field)
+
+ _extra = {}
+ for key, value in values.items():
+ if key not in model_fields:
+ if PYDANTIC_V2:
+ _extra[key] = value
+ else:
+ _fields_set.add(key)
+ fields_values[key] = value
+
+ object.__setattr__(m, "__dict__", fields_values) # noqa: PLC2801
+
+ if PYDANTIC_V2:
+ # these properties are copied from Pydantic's `model_construct()` method
+ object.__setattr__(m, "__pydantic_private__", None) # noqa: PLC2801
+ object.__setattr__(m, "__pydantic_extra__", _extra) # noqa: PLC2801
+ object.__setattr__(m, "__pydantic_fields_set__", _fields_set) # noqa: PLC2801
+ else:
+ # init_private_attributes() does not exist in v2
+ m._init_private_attributes() # type: ignore
+
+ # copied from Pydantic v1's `construct()` method
+ object.__setattr__(m, "__fields_set__", _fields_set) # noqa: PLC2801
+
+ return m
+
+ if not TYPE_CHECKING:
+ # type checkers incorrectly complain about this assignment
+ # because the type signatures are technically different
+ # although not in practice
+ model_construct = construct
+
+ if not PYDANTIC_V2:
+ # we define aliases for some of the new pydantic v2 methods so
+ # that we can just document these methods without having to specify
+ # a specific pydantic version as some users may not know which
+ # pydantic version they are currently using
+
+ @override
+ def model_dump(
+ self,
+ *,
+ mode: Literal["json", "python"] | str = "python",
+ include: IncEx = None,
+ exclude: IncEx = None,
+ by_alias: bool = False,
+ exclude_unset: bool = False,
+ exclude_defaults: bool = False,
+ exclude_none: bool = False,
+ round_trip: bool = False,
+ warnings: bool | Literal["none", "warn", "error"] = True,
+ context: dict[str, Any] | None = None,
+ serialize_as_any: bool = False,
+ ) -> dict[str, Any]:
+ """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
+
+ Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+ Args:
+ mode: The mode in which `to_python` should run.
+ If mode is 'json', the dictionary will only contain JSON serializable types.
+ If mode is 'python', the dictionary may contain any Python objects.
+ include: A list of fields to include in the output.
+ exclude: A list of fields to exclude from the output.
+ by_alias: Whether to use the field's alias in the dictionary key if defined.
+ exclude_unset: Whether to exclude fields that are unset or None from the output.
+ exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+ exclude_none: Whether to exclude fields that have a value of `None` from the output.
+ round_trip: Whether to enable serialization and deserialization round-trip support.
+ warnings: Whether to log warnings when invalid fields are encountered.
+
+ Returns:
+ A dictionary representation of the model.
+ """
+ if mode != "python":
+ raise ValueError("mode is only supported in Pydantic v2")
+ if round_trip != False:
+ raise ValueError("round_trip is only supported in Pydantic v2")
+ if warnings != True:
+ raise ValueError("warnings is only supported in Pydantic v2")
+ if context is not None:
+ raise ValueError("context is only supported in Pydantic v2")
+ if serialize_as_any != False:
+ raise ValueError("serialize_as_any is only supported in Pydantic v2")
+ return super().dict( # pyright: ignore[reportDeprecated]
+ include=include,
+ exclude=exclude,
+ by_alias=by_alias,
+ exclude_unset=exclude_unset,
+ exclude_defaults=exclude_defaults,
+ exclude_none=exclude_none,
+ )
+
+ @override
+ def model_dump_json(
+ self,
+ *,
+ indent: int | None = None,
+ include: IncEx = None,
+ exclude: IncEx = None,
+ by_alias: bool = False,
+ exclude_unset: bool = False,
+ exclude_defaults: bool = False,
+ exclude_none: bool = False,
+ round_trip: bool = False,
+ warnings: bool | Literal["none", "warn", "error"] = True,
+ context: dict[str, Any] | None = None,
+ serialize_as_any: bool = False,
+ ) -> str:
+ """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
+
+ Generates a JSON representation of the model using Pydantic's `to_json` method.
+
+ Args:
+ indent: Indentation to use in the JSON output. If None is passed, the output will be compact.
+ include: Field(s) to include in the JSON output. Can take either a string or set of strings.
+ exclude: Field(s) to exclude from the JSON output. Can take either a string or set of strings.
+ by_alias: Whether to serialize using field aliases.
+ exclude_unset: Whether to exclude fields that have not been explicitly set.
+ exclude_defaults: Whether to exclude fields that have the default value.
+ exclude_none: Whether to exclude fields that have a value of `None`.
+ round_trip: Whether to use serialization/deserialization between JSON and class instance.
+ warnings: Whether to show any warnings that occurred during serialization.
+
+ Returns:
+ A JSON string representation of the model.
+ """
+ if round_trip != False:
+ raise ValueError("round_trip is only supported in Pydantic v2")
+ if warnings != True:
+ raise ValueError("warnings is only supported in Pydantic v2")
+ if context is not None:
+ raise ValueError("context is only supported in Pydantic v2")
+ if serialize_as_any != False:
+ raise ValueError("serialize_as_any is only supported in Pydantic v2")
+ return super().json( # type: ignore[reportDeprecated]
+ indent=indent,
+ include=include,
+ exclude=exclude,
+ by_alias=by_alias,
+ exclude_unset=exclude_unset,
+ exclude_defaults=exclude_defaults,
+ exclude_none=exclude_none,
+ )
+
+
+def _construct_field(value: object, field: FieldInfo, key: str) -> object:
+ if value is None:
+ return field_get_default(field)
+
+ if PYDANTIC_V2:
+ type_ = field.annotation
+ else:
+ type_ = cast(type, field.outer_type_) # type: ignore
+
+ if type_ is None:
+ raise RuntimeError(f"Unexpected field type is None for {key}")
+
+ return construct_type(value=value, type_=type_)
+
+
+def is_basemodel(type_: type) -> bool:
+ """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`"""
+ if is_union(type_):
+ return any(is_basemodel(variant) for variant in get_args(type_))
+
+ return is_basemodel_type(type_)
+
+
+def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
+ origin = get_origin(type_) or type_
+ return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
+
+
+def build(
+ base_model_cls: Callable[P, _BaseModelT],
+ *args: P.args,
+ **kwargs: P.kwargs,
+) -> _BaseModelT:
+ """Construct a BaseModel class without validation.
+
+ This is useful for cases where you need to instantiate a `BaseModel`
+ from an API response as this provides type-safe params which isn't supported
+ by helpers like `construct_type()`.
+
+ ```py
+ build(MyModel, my_field_a="foo", my_field_b=123)
+ ```
+ """
+ if args:
+ raise TypeError(
+ "Received positional arguments which are not supported; Keyword arguments must be used instead",
+ )
+
+ return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs))
+
+
+def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T:
+ """Loose coercion to the expected type with construction of nested values.
+
+ Note: the returned value from this function is not guaranteed to match the
+ given type.
+ """
+ return cast(_T, construct_type(value=value, type_=type_))
+
+
+def construct_type(*, value: object, type_: type) -> object:
+ """Loose coercion to the expected type with construction of nested values.
+
+ If the given value does not match the expected type then it is returned as-is.
+ """
+ # we allow `object` as the input type because otherwise, passing things like
+ # `Literal['value']` will be reported as a type error by type checkers
+ type_ = cast("type[object]", type_)
+
+ # unwrap `Annotated[T, ...]` -> `T`
+ if is_annotated_type(type_):
+ meta: tuple[Any, ...] = get_args(type_)[1:]
+ type_ = extract_type_arg(type_, 0)
+ else:
+ meta = ()
+ # we need to use the origin class for any types that are subscripted generics
+ # e.g. Dict[str, object]
+ origin = get_origin(type_) or type_
+ args = get_args(type_)
+
+ if is_union(origin):
+ try:
+ return validate_type(type_=cast("type[object]", type_), value=value)
+ except Exception:
+ pass
+
+ # if the type is a discriminated union then we want to construct the right variant
+ # in the union, even if the data doesn't match exactly, otherwise we'd break code
+ # that relies on the constructed class types, e.g.
+ #
+ # class FooType:
+ # kind: Literal['foo']
+ # value: str
+ #
+ # class BarType:
+ # kind: Literal['bar']
+ # value: int
+ #
+ # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then
+ # we'd end up constructing `FooType` when it should be `BarType`.
+ discriminator = _build_discriminated_union_meta(union=type_, meta_annotations=meta)
+ if discriminator and is_mapping(value):
+ variant_value = value.get(discriminator.field_alias_from or discriminator.field_name)
+ if variant_value and isinstance(variant_value, str):
+ variant_type = discriminator.mapping.get(variant_value)
+ if variant_type:
+ return construct_type(type_=variant_type, value=value)
+
+ # if the data is not valid, use the first variant that doesn't fail while deserializing
+ for variant in args:
+ try:
+ return construct_type(value=value, type_=variant)
+ except Exception:
+ continue
+
+ raise RuntimeError(f"Could not convert data into a valid instance of {type_}")
+ if origin == dict:
+ if not is_mapping(value):
+ return value
+
+ _, items_type = get_args(type_) # Dict[_, items_type]
+ return {key: construct_type(value=item, type_=items_type) for key, item in value.items()}
+
+ if not is_literal_type(type_) and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel)):
+ if is_list(value):
+ return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value]
+
+ if is_mapping(value):
+ if issubclass(type_, BaseModel):
+ return type_.construct(**value) # type: ignore[arg-type]
+
+ return cast(Any, type_).construct(**value)
+
+ if origin == list:
+ if not is_list(value):
+ return value
+
+ inner_type = args[0] # List[inner_type]
+ return [construct_type(value=entry, type_=inner_type) for entry in value]
+
+ if origin == float:
+ if isinstance(value, int):
+ coerced = float(value)
+ if coerced != value:
+ return value
+ return coerced
+
+ return value
+
+ if type_ == datetime:
+ try:
+ return parse_datetime(value) # type: ignore
+ except Exception:
+ return value
+
+ if type_ == date:
+ try:
+ return parse_date(value) # type: ignore
+ except Exception:
+ return value
+
+ return value
+
+
+@runtime_checkable
+class CachedDiscriminatorType(Protocol):
+ __discriminator__: DiscriminatorDetails
+
+
+class DiscriminatorDetails:
+ field_name: str
+ """The name of the discriminator field in the variant class, e.g.
+
+ ```py
+ class Foo(BaseModel):
+ type: Literal['foo']
+ ```
+
+ Will result in field_name='type'
+ """
+
+ field_alias_from: str | None
+ """The name of the discriminator field in the API response, e.g.
+
+ ```py
+ class Foo(BaseModel):
+ type: Literal['foo'] = Field(alias='type_from_api')
+ ```
+
+ Will result in field_alias_from='type_from_api'
+ """
+
+ mapping: dict[str, type]
+ """Mapping of discriminator value to variant type, e.g.
+
+ {'foo': FooVariant, 'bar': BarVariant}
+ """
+
+ def __init__(
+ self,
+ *,
+ mapping: dict[str, type],
+ discriminator_field: str,
+ discriminator_alias: str | None,
+ ) -> None:
+ self.mapping = mapping
+ self.field_name = discriminator_field
+ self.field_alias_from = discriminator_alias
+
+
+def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
+ if isinstance(union, CachedDiscriminatorType):
+ return union.__discriminator__
+
+ discriminator_field_name: str | None = None
+
+ for annotation in meta_annotations:
+ if isinstance(annotation, PropertyInfo) and annotation.discriminator is not None:
+ discriminator_field_name = annotation.discriminator
+ break
+
+ if not discriminator_field_name:
+ return None
+
+ mapping: dict[str, type] = {}
+ discriminator_alias: str | None = None
+
+ for variant in get_args(union):
+ variant = strip_annotated_type(variant)
+ if is_basemodel_type(variant):
+ if PYDANTIC_V2:
+ field = _extract_field_schema_pv2(variant, discriminator_field_name)
+ if not field:
+ continue
+
+ # Note: if one variant defines an alias then they all should
+ discriminator_alias = field.get("serialization_alias")
+
+ field_schema = field["schema"]
+
+ if field_schema["type"] == "literal":
+ for entry in cast("LiteralSchema", field_schema)["expected"]:
+ if isinstance(entry, str):
+ mapping[entry] = variant
+ else:
+ field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name) # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+ if not field_info:
+ continue
+
+ # Note: if one variant defines an alias then they all should
+ discriminator_alias = field_info.alias
+
+ if field_info.annotation and is_literal_type(field_info.annotation):
+ for entry in get_args(field_info.annotation):
+ if isinstance(entry, str):
+ mapping[entry] = variant
+
+ if not mapping:
+ return None
+
+ details = DiscriminatorDetails(
+ mapping=mapping,
+ discriminator_field=discriminator_field_name,
+ discriminator_alias=discriminator_alias,
+ )
+ cast(CachedDiscriminatorType, union).__discriminator__ = details
+ return details
+
+
+def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
+ schema = model.__pydantic_core_schema__
+ if schema["type"] != "model":
+ return None
+
+ fields_schema = schema["schema"]
+ if fields_schema["type"] != "model-fields":
+ return None
+
+ fields_schema = cast("ModelFieldsSchema", fields_schema)
+
+ field = fields_schema["fields"].get(field_name)
+ if not field:
+ return None
+
+ return cast("ModelField", field) # pyright: ignore[reportUnnecessaryCast]
+
+
+def validate_type(*, type_: type[_T], value: object) -> _T:
+ """Strict validation that the given value matches the expected type"""
+ if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
+ return cast(_T, parse_obj(type_, value))
+
+ return cast(_T, _validate_non_model_type(type_=type_, value=value))
+
+
+# Subclassing here confuses type checkers, so we treat this class as non-inheriting.
+if TYPE_CHECKING:
+ GenericModel = BaseModel
+else:
+
+ class GenericModel(BaseGenericModel, BaseModel):
+ pass
+
+
+if PYDANTIC_V2:
+ from pydantic import TypeAdapter
+
+ def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+ return TypeAdapter(type_).validate_python(value)
+
+elif not TYPE_CHECKING:
+
+ class TypeAdapter(Generic[_T]):
+ """Used as a placeholder to easily convert runtime types to a Pydantic format
+ to provide validation.
+
+ For example:
+ ```py
+ validated = RootModel[int](__root__="5").__root__
+ # validated: 5
+ ```
+ """
+
+ def __init__(self, type_: type[_T]):
+ self.type_ = type_
+
+ def validate_python(self, value: Any) -> _T:
+ if not isinstance(value, self.type_):
+ raise ValueError(f"Invalid type: {value} is not of type {self.type_}")
+ return value
+
+ def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+ return TypeAdapter(type_).validate_python(value)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py
index 7a91f9b796..ea1d3f09dc 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py
@@ -1,11 +1,21 @@
from __future__ import annotations
-from collections.abc import Mapping, Sequence
+from collections.abc import Callable, Mapping, Sequence
from os import PathLike
-from typing import IO, TYPE_CHECKING, Any, Literal, TypeVar, Union
+from typing import (
+ IO,
+ TYPE_CHECKING,
+ Any,
+ Literal,
+ Optional,
+ TypeAlias,
+ TypeVar,
+ Union,
+)
import pydantic
-from typing_extensions import override
+from httpx import Response
+from typing_extensions import Protocol, TypedDict, override, runtime_checkable
Query = Mapping[str, object]
Body = object
@@ -22,7 +32,7 @@ else:
# Sentinel class used until PEP 0661 is accepted
-class NotGiven(pydantic.BaseModel):
+class NotGiven:
"""
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different behavior).
@@ -50,7 +60,7 @@ NotGivenOr = Union[_T, NotGiven]
NOT_GIVEN = NotGiven()
-class Omit(pydantic.BaseModel):
+class Omit:
"""In certain situations you need to be able to represent a case where a default value has
to be explicitly removed and `None` is not an appropriate substitute, for example:
@@ -71,37 +81,90 @@ class Omit(pydantic.BaseModel):
return False
+@runtime_checkable
+class ModelBuilderProtocol(Protocol):
+ @classmethod
+ def build(
+ cls: type[_T],
+ *,
+ response: Response,
+ data: object,
+ ) -> _T: ...
+
+
Headers = Mapping[str, Union[str, Omit]]
+
+class HeadersLikeProtocol(Protocol):
+ def get(self, __key: str) -> str | None: ...
+
+
+HeadersLike = Union[Headers, HeadersLikeProtocol]
+
ResponseT = TypeVar(
"ResponseT",
- bound="Union[str, None, BaseModel, list[Any], Dict[str, Any], Response, UnknownResponse, ModelBuilderProtocol,"
- " BinaryResponseContent]",
+ bound="Union[str, None, BaseModel, list[Any], dict[str, Any], Response, UnknownResponse, ModelBuilderProtocol, BinaryResponseContent]", # noqa: E501
)
+StrBytesIntFloat = Union[str, bytes, int, float]
+
+# Note: copied from Pydantic
+# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
+IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+
+PostParser = Callable[[Any], Any]
+
+
+@runtime_checkable
+class InheritsGeneric(Protocol):
+ """Represents a type that has inherited from `Generic`
+
+ The `__orig_bases__` property can be used to determine the resolved
+ type variable for a given base class.
+ """
+
+ __orig_bases__: tuple[_GenericAlias]
+
+
+class _GenericAlias(Protocol):
+ __origin__: type[object]
+
+
+class HttpxSendArgs(TypedDict, total=False):
+ auth: httpx.Auth
+
+
# for user input files
if TYPE_CHECKING:
+ Base64FileInput = Union[IO[bytes], PathLike[str]]
FileContent = Union[IO[bytes], bytes, PathLike[str]]
else:
+ Base64FileInput = Union[IO[bytes], PathLike]
FileContent = Union[IO[bytes], bytes, PathLike]
FileTypes = Union[
- FileContent, # file content
- tuple[str, FileContent], # (filename, file)
- tuple[str, FileContent, str], # (filename, file , content_type)
- tuple[str, FileContent, str, Mapping[str, str]], # (filename, file , content_type, headers)
+ # file (or bytes)
+ FileContent,
+ # (filename, file (or bytes))
+ tuple[Optional[str], FileContent],
+ # (filename, file (or bytes), content_type)
+ tuple[Optional[str], FileContent, Optional[str]],
+ # (filename, file (or bytes), content_type, headers)
+ tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
]
-
RequestFiles = Union[Mapping[str, FileTypes], Sequence[tuple[str, FileTypes]]]
-# for httpx client supported files
-
+# duplicate of the above but without our custom file support
HttpxFileContent = Union[bytes, IO[bytes]]
HttpxFileTypes = Union[
- FileContent, # file content
- tuple[str, HttpxFileContent], # (filename, file)
- tuple[str, HttpxFileContent, str], # (filename, file , content_type)
- tuple[str, HttpxFileContent, str, Mapping[str, str]], # (filename, file , content_type, headers)
+ # file (or bytes)
+ HttpxFileContent,
+ # (filename, file (or bytes))
+ tuple[Optional[str], HttpxFileContent],
+ # (filename, file (or bytes), content_type)
+ tuple[Optional[str], HttpxFileContent, Optional[str]],
+ # (filename, file (or bytes), content_type, headers)
+ tuple[Optional[str], HttpxFileContent, Optional[str], Mapping[str, str]],
]
HttpxRequestFiles = Union[Mapping[str, HttpxFileTypes], Sequence[tuple[str, HttpxFileTypes]]]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py
new file mode 100644
index 0000000000..8e43bdebec
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py
@@ -0,0 +1,12 @@
+import httpx
+
+RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
+# 通过 `Timeout` 控制接口`connect` 和 `read` 超时时间,默认为`timeout=300.0, connect=8.0`
+ZHIPUAI_DEFAULT_TIMEOUT = httpx.Timeout(timeout=300.0, connect=8.0)
+# 通过 `retry` 参数控制重试次数,默认为3次
+ZHIPUAI_DEFAULT_MAX_RETRIES = 3
+# 通过 `Limits` 控制最大连接数和保持连接数,默认为`max_connections=50, max_keepalive_connections=10`
+ZHIPUAI_DEFAULT_LIMITS = httpx.Limits(max_connections=50, max_keepalive_connections=10)
+
+INITIAL_RETRY_DELAY = 0.5
+MAX_RETRY_DELAY = 8.0
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py
index 1027c1bc5b..e2c9d24c6c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py
@@ -13,6 +13,7 @@ __all__ = [
"APIResponseError",
"APIResponseValidationError",
"APITimeoutError",
+ "APIConnectionError",
]
@@ -24,7 +25,7 @@ class ZhipuAIError(Exception):
super().__init__(message)
-class APIStatusError(Exception):
+class APIStatusError(ZhipuAIError):
response: httpx.Response
status_code: int
@@ -49,7 +50,7 @@ class APIInternalError(APIStatusError): ...
class APIServerFlowExceedError(APIStatusError): ...
-class APIResponseError(Exception):
+class APIResponseError(ZhipuAIError):
message: str
request: httpx.Request
json_data: object
@@ -75,9 +76,11 @@ class APIResponseValidationError(APIResponseError):
self.status_code = response.status_code
-class APITimeoutError(Exception):
- request: httpx.Request
+class APIConnectionError(APIResponseError):
+ def __init__(self, *, message: str = "Connection error.", request: httpx.Request) -> None:
+ super().__init__(message, request, json_data=None)
- def __init__(self, request: httpx.Request):
- self.request = request
- super().__init__("Request Timeout")
+
+class APITimeoutError(APIConnectionError):
+ def __init__(self, request: httpx.Request) -> None:
+ super().__init__(message="Request timed out.", request=request)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py
index 0796bfe11c..f9d2e14d9e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py
@@ -2,40 +2,74 @@ from __future__ import annotations
import io
import os
-from collections.abc import Mapping, Sequence
-from pathlib import Path
+import pathlib
+from typing import TypeGuard, overload
-from ._base_type import FileTypes, HttpxFileTypes, HttpxRequestFiles, RequestFiles
+from ._base_type import (
+ Base64FileInput,
+ FileContent,
+ FileTypes,
+ HttpxFileContent,
+ HttpxFileTypes,
+ HttpxRequestFiles,
+ RequestFiles,
+)
+from ._utils import is_mapping_t, is_sequence_t, is_tuple_t
-def is_file_content(obj: object) -> bool:
+def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
+ return isinstance(obj, io.IOBase | os.PathLike)
+
+
+def is_file_content(obj: object) -> TypeGuard[FileContent]:
return isinstance(obj, bytes | tuple | io.IOBase | os.PathLike)
+def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
+ if not is_file_content(obj):
+ prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`"
+ raise RuntimeError(
+ f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/openai/openai-python/tree/main#file-uploads"
+ ) from None
+
+
+@overload
+def to_httpx_files(files: None) -> None: ...
+
+
+@overload
+def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
+
+
+def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
+ if files is None:
+ return None
+
+ if is_mapping_t(files):
+ files = {key: _transform_file(file) for key, file in files.items()}
+ elif is_sequence_t(files):
+ files = [(key, _transform_file(file)) for key, file in files]
+ else:
+ raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence")
+
+ return files
+
+
def _transform_file(file: FileTypes) -> HttpxFileTypes:
if is_file_content(file):
if isinstance(file, os.PathLike):
- path = Path(file)
- return path.name, path.read_bytes()
- else:
- return file
- if isinstance(file, tuple):
- if isinstance(file[1], os.PathLike):
- return (file[0], Path(file[1]).read_bytes(), *file[2:])
- else:
- return (file[0], file[1], *file[2:])
- else:
- raise TypeError(f"Unexpected input file with type {type(file)},Expected FileContent type or tuple type")
+ path = pathlib.Path(file)
+ return (path.name, path.read_bytes())
+
+ return file
+
+ if is_tuple_t(file):
+ return (file[0], _read_file_content(file[1]), *file[2:])
+
+ raise TypeError("Expected file types input to be a FileContent type or to be a tuple")
-def make_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
- if files is None:
- return None
-
- if isinstance(files, Mapping):
- files = {key: _transform_file(file) for key, file in files.items()}
- elif isinstance(files, Sequence):
- files = [(key, _transform_file(file)) for key, file in files]
- else:
- raise TypeError(f"Unexpected input file with type {type(files)}, excepted Mapping or Sequence")
- return files
+def _read_file_content(file: FileContent) -> HttpxFileContent:
+ if isinstance(file, os.PathLike):
+ return pathlib.Path(file).read_bytes()
+ return file
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
index 5f7f6d04f2..ffdafb85d5 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
@@ -1,23 +1,70 @@
from __future__ import annotations
import inspect
-from collections.abc import Mapping
-from typing import Any, Union, cast
+import logging
+import time
+import warnings
+from collections.abc import Iterator, Mapping
+from itertools import starmap
+from random import random
+from typing import TYPE_CHECKING, Any, Generic, Literal, Optional, TypeVar, Union, cast, overload
import httpx
import pydantic
from httpx import URL, Timeout
-from tenacity import retry
-from tenacity.stop import stop_after_attempt
-from . import _errors
-from ._base_type import NOT_GIVEN, AnyMapping, Body, Data, Headers, NotGiven, Query, RequestFiles, ResponseT
-from ._errors import APIResponseValidationError, APIStatusError, APITimeoutError
-from ._files import make_httpx_files
-from ._request_opt import ClientRequestParam, UserRequestInput
-from ._response import HttpResponse
+from . import _errors, get_origin
+from ._base_compat import model_copy
+from ._base_models import GenericModel, construct_type, validate_type
+from ._base_type import (
+ NOT_GIVEN,
+ AnyMapping,
+ Body,
+ Data,
+ Headers,
+ HttpxSendArgs,
+ ModelBuilderProtocol,
+ NotGiven,
+ Omit,
+ PostParser,
+ Query,
+ RequestFiles,
+ ResponseT,
+)
+from ._constants import (
+ INITIAL_RETRY_DELAY,
+ MAX_RETRY_DELAY,
+ RAW_RESPONSE_HEADER,
+ ZHIPUAI_DEFAULT_LIMITS,
+ ZHIPUAI_DEFAULT_MAX_RETRIES,
+ ZHIPUAI_DEFAULT_TIMEOUT,
+)
+from ._errors import APIConnectionError, APIResponseValidationError, APIStatusError, APITimeoutError
+from ._files import to_httpx_files
+from ._legacy_response import LegacyAPIResponse
+from ._request_opt import FinalRequestOptions, UserRequestInput
+from ._response import APIResponse, BaseAPIResponse, extract_response_type
from ._sse_client import StreamResponse
-from ._utils import flatten
+from ._utils import flatten, is_given, is_mapping
+
+log: logging.Logger = logging.getLogger(__name__)
+
+# TODO: make base page type vars covariant
+SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]")
+# AsyncPageT = TypeVar("AsyncPageT", bound="BaseAsyncPage[Any]")
+
+_T = TypeVar("_T")
+_T_co = TypeVar("_T_co", covariant=True)
+
+if TYPE_CHECKING:
+ from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+else:
+ try:
+ from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+ except ImportError:
+ # taken from https://github.com/encode/httpx/blob/3ba5fe0d7ac70222590e759c31442b1cab263791/httpx/_config.py#L366
+ HTTPX_DEFAULT_TIMEOUT = Timeout(5.0)
+
headers = {
"Accept": "application/json",
@@ -25,50 +72,180 @@ headers = {
}
-def _merge_map(map1: Mapping, map2: Mapping) -> Mapping:
- merged = {**map1, **map2}
- return {key: val for key, val in merged.items() if val is not None}
+class PageInfo:
+ """Stores the necessary information to build the request to retrieve the next page.
+
+ Either `url` or `params` must be set.
+ """
+
+ url: URL | NotGiven
+ params: Query | NotGiven
+
+ @overload
+ def __init__(
+ self,
+ *,
+ url: URL,
+ ) -> None: ...
+
+ @overload
+ def __init__(
+ self,
+ *,
+ params: Query,
+ ) -> None: ...
+
+ def __init__(
+ self,
+ *,
+ url: URL | NotGiven = NOT_GIVEN,
+ params: Query | NotGiven = NOT_GIVEN,
+ ) -> None:
+ self.url = url
+ self.params = params
-from itertools import starmap
+class BasePage(GenericModel, Generic[_T]):
+ """
+ Defines the core interface for pagination.
-from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+ Type Args:
+ ModelT: The pydantic model that represents an item in the response.
-ZHIPUAI_DEFAULT_TIMEOUT = httpx.Timeout(timeout=300.0, connect=8.0)
-ZHIPUAI_DEFAULT_MAX_RETRIES = 3
-ZHIPUAI_DEFAULT_LIMITS = httpx.Limits(max_connections=5, max_keepalive_connections=5)
+ Methods:
+ has_next_page(): Check if there is another page available
+ next_page_info(): Get the necessary information to make a request for the next page
+ """
+
+ _options: FinalRequestOptions = pydantic.PrivateAttr()
+ _model: type[_T] = pydantic.PrivateAttr()
+
+ def has_next_page(self) -> bool:
+ items = self._get_page_items()
+ if not items:
+ return False
+ return self.next_page_info() is not None
+
+ def next_page_info(self) -> Optional[PageInfo]: ...
+
+ def _get_page_items(self) -> Iterable[_T]: # type: ignore[empty-body]
+ ...
+
+ def _params_from_url(self, url: URL) -> httpx.QueryParams:
+ # TODO: do we have to preprocess params here?
+ return httpx.QueryParams(cast(Any, self._options.params)).merge(url.params)
+
+ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
+ options = model_copy(self._options)
+ options._strip_raw_response_header()
+
+ if not isinstance(info.params, NotGiven):
+ options.params = {**options.params, **info.params}
+ return options
+
+ if not isinstance(info.url, NotGiven):
+ params = self._params_from_url(info.url)
+ url = info.url.copy_with(params=params)
+ options.params = dict(url.params)
+ options.url = str(url)
+ return options
+
+ raise ValueError("Unexpected PageInfo state")
+
+
+class BaseSyncPage(BasePage[_T], Generic[_T]):
+ _client: HttpClient = pydantic.PrivateAttr()
+
+ def _set_private_attributes(
+ self,
+ client: HttpClient,
+ model: type[_T],
+ options: FinalRequestOptions,
+ ) -> None:
+ self._model = model
+ self._client = client
+ self._options = options
+
+ # Pydantic uses a custom `__iter__` method to support casting BaseModels
+ # to dictionaries. e.g. dict(model).
+ # As we want to support `for item in page`, this is inherently incompatible
+ # with the default pydantic behavior. It is not possible to support both
+ # use cases at once. Fortunately, this is not a big deal as all other pydantic
+ # methods should continue to work as expected as there is an alternative method
+ # to cast a model to a dictionary, model.dict(), which is used internally
+ # by pydantic.
+ def __iter__(self) -> Iterator[_T]: # type: ignore
+ for page in self.iter_pages():
+ yield from page._get_page_items()
+
+ def iter_pages(self: SyncPageT) -> Iterator[SyncPageT]:
+ page = self
+ while True:
+ yield page
+ if page.has_next_page():
+ page = page.get_next_page()
+ else:
+ return
+
+ def get_next_page(self: SyncPageT) -> SyncPageT:
+ info = self.next_page_info()
+ if not info:
+ raise RuntimeError(
+ "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
+ )
+
+ options = self._info_to_options(info)
+ return self._client._request_api_list(self._model, page=self.__class__, options=options)
class HttpClient:
_client: httpx.Client
_version: str
_base_url: URL
-
+ max_retries: int
timeout: Union[float, Timeout, None]
_limits: httpx.Limits
_has_custom_http_client: bool
_default_stream_cls: type[StreamResponse[Any]] | None = None
+ _strict_response_validation: bool
+
def __init__(
self,
*,
version: str,
base_url: URL,
+ _strict_response_validation: bool,
+ max_retries: int = ZHIPUAI_DEFAULT_MAX_RETRIES,
timeout: Union[float, Timeout, None],
+ limits: httpx.Limits | None = None,
custom_httpx_client: httpx.Client | None = None,
custom_headers: Mapping[str, str] | None = None,
) -> None:
- if timeout is None or isinstance(timeout, NotGiven):
+ if limits is not None:
+ warnings.warn(
+ "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead", # noqa: E501
+ category=DeprecationWarning,
+ stacklevel=3,
+ )
+ if custom_httpx_client is not None:
+ raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
+ else:
+ limits = ZHIPUAI_DEFAULT_LIMITS
+
+ if not is_given(timeout):
if custom_httpx_client and custom_httpx_client.timeout != HTTPX_DEFAULT_TIMEOUT:
timeout = custom_httpx_client.timeout
else:
timeout = ZHIPUAI_DEFAULT_TIMEOUT
- self.timeout = cast(Timeout, timeout)
+ self.max_retries = max_retries
+ self.timeout = timeout
+ self._limits = limits
self._has_custom_http_client = bool(custom_httpx_client)
self._client = custom_httpx_client or httpx.Client(
base_url=base_url,
timeout=self.timeout,
- limits=ZHIPUAI_DEFAULT_LIMITS,
+ limits=limits,
)
self._version = version
url = URL(url=base_url)
@@ -76,6 +253,7 @@ class HttpClient:
url = url.copy_with(raw_path=url.raw_path + b"/")
self._base_url = url
self._custom_headers = custom_headers or {}
+ self._strict_response_validation = _strict_response_validation
def _prepare_url(self, url: str) -> URL:
sub_url = URL(url)
@@ -93,42 +271,88 @@ class HttpClient:
"ZhipuAI-SDK-Ver": self._version,
"source_type": "zhipu-sdk-python",
"x-request-sdk": "zhipu-sdk-python",
- **self._auth_headers,
+ **self.auth_headers,
**self._custom_headers,
}
@property
- def _auth_headers(self):
+ def custom_auth(self) -> httpx.Auth | None:
+ return None
+
+ @property
+ def auth_headers(self):
return {}
- def _prepare_headers(self, request_param: ClientRequestParam) -> httpx.Headers:
- custom_headers = request_param.headers or {}
- headers_dict = _merge_map(self._default_headers, custom_headers)
+ def _prepare_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+ custom_headers = options.headers or {}
+ headers_dict = _merge_mappings(self._default_headers, custom_headers)
httpx_headers = httpx.Headers(headers_dict)
return httpx_headers
- def _prepare_request(self, request_param: ClientRequestParam) -> httpx.Request:
+ def _remaining_retries(
+ self,
+ remaining_retries: Optional[int],
+ options: FinalRequestOptions,
+ ) -> int:
+ return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
+
+ def _calculate_retry_timeout(
+ self,
+ remaining_retries: int,
+ options: FinalRequestOptions,
+ response_headers: Optional[httpx.Headers] = None,
+ ) -> float:
+ max_retries = options.get_max_retries(self.max_retries)
+
+ # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
+ # retry_after = self._parse_retry_after_header(response_headers)
+ # if retry_after is not None and 0 < retry_after <= 60:
+ # return retry_after
+
+ nb_retries = max_retries - remaining_retries
+
+ # Apply exponential backoff, but not more than the max.
+ sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
+
+ # Apply some jitter, plus-or-minus half a second.
+ jitter = 1 - 0.25 * random()
+ timeout = sleep_seconds * jitter
+ return max(timeout, 0)
+
+ def _build_request(self, options: FinalRequestOptions) -> httpx.Request:
kwargs: dict[str, Any] = {}
- json_data = request_param.json_data
- headers = self._prepare_headers(request_param)
- url = self._prepare_url(request_param.url)
- json_data = request_param.json_data
+ headers = self._prepare_headers(options)
+ url = self._prepare_url(options.url)
+ json_data = options.json_data
+ if options.extra_json is not None:
+ if json_data is None:
+ json_data = cast(Body, options.extra_json)
+ elif is_mapping(json_data):
+ json_data = _merge_mappings(json_data, options.extra_json)
+ else:
+ raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
+
+ content_type = headers.get("Content-Type")
+ # multipart/form-data; boundary=---abc--
if headers.get("Content-Type") == "multipart/form-data":
- headers.pop("Content-Type")
+ if "boundary" not in content_type:
+ # only remove the header if the boundary hasn't been explicitly set
+ # as the caller doesn't want httpx to come up with their own boundary
+ headers.pop("Content-Type")
if json_data:
kwargs["data"] = self._make_multipartform(json_data)
return self._client.build_request(
headers=headers,
- timeout=self.timeout if isinstance(request_param.timeout, NotGiven) else request_param.timeout,
- method=request_param.method,
+ timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
+ method=options.method,
url=url,
json=json_data,
- files=request_param.files,
- params=request_param.params,
+ files=options.files,
+ params=options.params,
**kwargs,
)
@@ -170,20 +394,6 @@ class HttpClient:
serialized[key] = value
return serialized
- def _parse_response(
- self,
- *,
- cast_type: type[ResponseT],
- response: httpx.Response,
- enable_stream: bool,
- request_param: ClientRequestParam,
- stream_cls: type[StreamResponse[Any]] | None = None,
- ) -> HttpResponse:
- http_response = HttpResponse(
- raw_response=response, cast_type=cast_type, client=self, enable_stream=enable_stream, stream_cls=stream_cls
- )
- return http_response.parse()
-
def _process_response_data(
self,
*,
@@ -194,14 +404,58 @@ class HttpClient:
if data is None:
return cast(ResponseT, None)
- try:
- if inspect.isclass(cast_type) and issubclass(cast_type, pydantic.BaseModel):
- return cast(ResponseT, cast_type.validate(data))
+ if cast_type is object:
+ return cast(ResponseT, data)
- return cast(ResponseT, pydantic.TypeAdapter(cast_type).validate_python(data))
+ try:
+ if inspect.isclass(cast_type) and issubclass(cast_type, ModelBuilderProtocol):
+ return cast(ResponseT, cast_type.build(response=response, data=data))
+
+ if self._strict_response_validation:
+ return cast(ResponseT, validate_type(type_=cast_type, value=data))
+
+ return cast(ResponseT, construct_type(type_=cast_type, value=data))
except pydantic.ValidationError as err:
raise APIResponseValidationError(response=response, json_data=data) from err
+ def _should_stream_response_body(self, request: httpx.Request) -> bool:
+ return request.headers.get(RAW_RESPONSE_HEADER) == "stream" # type: ignore[no-any-return]
+
+ def _should_retry(self, response: httpx.Response) -> bool:
+ # Note: this is not a standard header
+ should_retry_header = response.headers.get("x-should-retry")
+
+ # If the server explicitly says whether or not to retry, obey.
+ if should_retry_header == "true":
+ log.debug("Retrying as header `x-should-retry` is set to `true`")
+ return True
+ if should_retry_header == "false":
+ log.debug("Not retrying as header `x-should-retry` is set to `false`")
+ return False
+
+ # Retry on request timeouts.
+ if response.status_code == 408:
+ log.debug("Retrying due to status code %i", response.status_code)
+ return True
+
+ # Retry on lock timeouts.
+ if response.status_code == 409:
+ log.debug("Retrying due to status code %i", response.status_code)
+ return True
+
+ # Retry on rate limits.
+ if response.status_code == 429:
+ log.debug("Retrying due to status code %i", response.status_code)
+ return True
+
+ # Retry internal errors.
+ if response.status_code >= 500:
+ log.debug("Retrying due to status code %i", response.status_code)
+ return True
+
+ log.debug("Not retrying")
+ return False
+
def is_closed(self) -> bool:
return self._client.is_closed
@@ -214,117 +468,385 @@ class HttpClient:
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
- @retry(stop=stop_after_attempt(ZHIPUAI_DEFAULT_MAX_RETRIES))
def request(
+ self,
+ cast_type: type[ResponseT],
+ options: FinalRequestOptions,
+ remaining_retries: Optional[int] = None,
+ *,
+ stream: bool = False,
+ stream_cls: type[StreamResponse] | None = None,
+ ) -> ResponseT | StreamResponse:
+ return self._request(
+ cast_type=cast_type,
+ options=options,
+ stream=stream,
+ stream_cls=stream_cls,
+ remaining_retries=remaining_retries,
+ )
+
+ def _request(
self,
*,
cast_type: type[ResponseT],
- params: ClientRequestParam,
- enable_stream: bool = False,
- stream_cls: type[StreamResponse[Any]] | None = None,
+ options: FinalRequestOptions,
+ remaining_retries: int | None,
+ stream: bool,
+ stream_cls: type[StreamResponse] | None,
) -> ResponseT | StreamResponse:
- request = self._prepare_request(params)
+ retries = self._remaining_retries(remaining_retries, options)
+ request = self._build_request(options)
+ kwargs: HttpxSendArgs = {}
+ if self.custom_auth is not None:
+ kwargs["auth"] = self.custom_auth
try:
response = self._client.send(
request,
- stream=enable_stream,
+ stream=stream or self._should_stream_response_body(request=request),
+ **kwargs,
)
- response.raise_for_status()
except httpx.TimeoutException as err:
+ log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+ if retries > 0:
+ return self._retry_request(
+ options,
+ cast_type,
+ retries,
+ stream=stream,
+ stream_cls=stream_cls,
+ response_headers=None,
+ )
+
+ log.debug("Raising timeout error")
raise APITimeoutError(request=request) from err
- except httpx.HTTPStatusError as err:
- err.response.read()
- # raise err
+ except Exception as err:
+ log.debug("Encountered Exception", exc_info=True)
+
+ if retries > 0:
+ return self._retry_request(
+ options,
+ cast_type,
+ retries,
+ stream=stream,
+ stream_cls=stream_cls,
+ response_headers=None,
+ )
+
+ log.debug("Raising connection error")
+ raise APIConnectionError(request=request) from err
+
+ log.debug(
+ 'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+ )
+
+ try:
+ response.raise_for_status()
+ except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
+ log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+ if retries > 0 and self._should_retry(err.response):
+ err.response.close()
+ return self._retry_request(
+ options,
+ cast_type,
+ retries,
+ err.response.headers,
+ stream=stream,
+ stream_cls=stream_cls,
+ )
+
+ # If the response is streamed then we need to explicitly read the response
+ # to completion before attempting to access the response text.
+ if not err.response.is_closed:
+ err.response.read()
+
+ log.debug("Re-raising status error")
raise self._make_status_error(err.response) from None
- except Exception as err:
- raise err
-
- return self._parse_response(
+ # return self._parse_response(
+ # cast_type=cast_type,
+ # options=options,
+ # response=response,
+ # stream=stream,
+ # stream_cls=stream_cls,
+ # )
+ return self._process_response(
cast_type=cast_type,
- request_param=params,
+ options=options,
response=response,
- enable_stream=enable_stream,
+ stream=stream,
stream_cls=stream_cls,
)
+ def _retry_request(
+ self,
+ options: FinalRequestOptions,
+ cast_type: type[ResponseT],
+ remaining_retries: int,
+ response_headers: httpx.Headers | None,
+ *,
+ stream: bool,
+ stream_cls: type[StreamResponse] | None,
+ ) -> ResponseT | StreamResponse:
+ remaining = remaining_retries - 1
+ if remaining == 1:
+ log.debug("1 retry left")
+ else:
+ log.debug("%i retries left", remaining)
+
+ timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+ log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+ # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
+ # different thread if necessary.
+ time.sleep(timeout)
+
+ return self._request(
+ options=options,
+ cast_type=cast_type,
+ remaining_retries=remaining,
+ stream=stream,
+ stream_cls=stream_cls,
+ )
+
+ def _process_response(
+ self,
+ *,
+ cast_type: type[ResponseT],
+ options: FinalRequestOptions,
+ response: httpx.Response,
+ stream: bool,
+ stream_cls: type[StreamResponse] | None,
+ ) -> ResponseT:
+ # _legacy_response with raw_response_header to parser method
+ if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+ return cast(
+ ResponseT,
+ LegacyAPIResponse(
+ raw=response,
+ client=self,
+ cast_type=cast_type,
+ stream=stream,
+ stream_cls=stream_cls,
+ options=options,
+ ),
+ )
+
+ origin = get_origin(cast_type) or cast_type
+
+ if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+ if not issubclass(origin, APIResponse):
+ raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
+
+ response_cls = cast("type[BaseAPIResponse[Any]]", cast_type)
+ return cast(
+ ResponseT,
+ response_cls(
+ raw=response,
+ client=self,
+ cast_type=extract_response_type(response_cls),
+ stream=stream,
+ stream_cls=stream_cls,
+ options=options,
+ ),
+ )
+
+ if cast_type == httpx.Response:
+ return cast(ResponseT, response)
+
+ api_response = APIResponse(
+ raw=response,
+ client=self,
+ cast_type=cast("type[ResponseT]", cast_type), # pyright: ignore[reportUnnecessaryCast]
+ stream=stream,
+ stream_cls=stream_cls,
+ options=options,
+ )
+ if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+ return cast(ResponseT, api_response)
+
+ return api_response.parse()
+
+ def _request_api_list(
+ self,
+ model: type[object],
+ page: type[SyncPageT],
+ options: FinalRequestOptions,
+ ) -> SyncPageT:
+ def _parser(resp: SyncPageT) -> SyncPageT:
+ resp._set_private_attributes(
+ client=self,
+ model=model,
+ options=options,
+ )
+ return resp
+
+ options.post_parser = _parser
+
+ return self.request(page, options, stream=False)
+
+ @overload
+ def get(
+ self,
+ path: str,
+ *,
+ cast_type: type[ResponseT],
+ options: UserRequestInput = {},
+ stream: Literal[False] = False,
+ ) -> ResponseT: ...
+
+ @overload
+ def get(
+ self,
+ path: str,
+ *,
+ cast_type: type[ResponseT],
+ options: UserRequestInput = {},
+ stream: Literal[True],
+ stream_cls: type[StreamResponse],
+ ) -> StreamResponse: ...
+
+ @overload
+ def get(
+ self,
+ path: str,
+ *,
+ cast_type: type[ResponseT],
+ options: UserRequestInput = {},
+ stream: bool,
+ stream_cls: type[StreamResponse] | None = None,
+ ) -> ResponseT | StreamResponse: ...
+
def get(
self,
path: str,
*,
cast_type: type[ResponseT],
options: UserRequestInput = {},
- enable_stream: bool = False,
- ) -> ResponseT | StreamResponse:
- opts = ClientRequestParam.construct(method="get", url=path, **options)
- return self.request(cast_type=cast_type, params=opts, enable_stream=enable_stream)
+ stream: bool = False,
+ stream_cls: type[StreamResponse] | None = None,
+ ) -> ResponseT:
+ opts = FinalRequestOptions.construct(method="get", url=path, **options)
+ return cast(ResponseT, self.request(cast_type, opts, stream=stream, stream_cls=stream_cls))
+
+ @overload
+ def post(
+ self,
+ path: str,
+ *,
+ cast_type: type[ResponseT],
+ body: Body | None = None,
+ options: UserRequestInput = {},
+ files: RequestFiles | None = None,
+ stream: Literal[False] = False,
+ ) -> ResponseT: ...
+
+ @overload
+ def post(
+ self,
+ path: str,
+ *,
+ cast_type: type[ResponseT],
+ body: Body | None = None,
+ options: UserRequestInput = {},
+ files: RequestFiles | None = None,
+ stream: Literal[True],
+ stream_cls: type[StreamResponse],
+ ) -> StreamResponse: ...
+
+ @overload
+ def post(
+ self,
+ path: str,
+ *,
+ cast_type: type[ResponseT],
+ body: Body | None = None,
+ options: UserRequestInput = {},
+ files: RequestFiles | None = None,
+ stream: bool,
+ stream_cls: type[StreamResponse] | None = None,
+ ) -> ResponseT | StreamResponse: ...
def post(
self,
path: str,
*,
- body: Body | None = None,
cast_type: type[ResponseT],
+ body: Body | None = None,
options: UserRequestInput = {},
files: RequestFiles | None = None,
- enable_stream: bool = False,
+ stream: bool = False,
stream_cls: type[StreamResponse[Any]] | None = None,
) -> ResponseT | StreamResponse:
- opts = ClientRequestParam.construct(
- method="post", json_data=body, files=make_httpx_files(files), url=path, **options
+ opts = FinalRequestOptions.construct(
+ method="post", url=path, json_data=body, files=to_httpx_files(files), **options
)
- return self.request(cast_type=cast_type, params=opts, enable_stream=enable_stream, stream_cls=stream_cls)
+ return cast(ResponseT, self.request(cast_type, opts, stream=stream, stream_cls=stream_cls))
def patch(
self,
path: str,
*,
- body: Body | None = None,
cast_type: type[ResponseT],
+ body: Body | None = None,
options: UserRequestInput = {},
) -> ResponseT:
- opts = ClientRequestParam.construct(method="patch", url=path, json_data=body, **options)
+ opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
return self.request(
cast_type=cast_type,
- params=opts,
+ options=opts,
)
def put(
self,
path: str,
*,
- body: Body | None = None,
cast_type: type[ResponseT],
+ body: Body | None = None,
options: UserRequestInput = {},
files: RequestFiles | None = None,
) -> ResponseT | StreamResponse:
- opts = ClientRequestParam.construct(
- method="put", url=path, json_data=body, files=make_httpx_files(files), **options
+ opts = FinalRequestOptions.construct(
+ method="put", url=path, json_data=body, files=to_httpx_files(files), **options
)
return self.request(
cast_type=cast_type,
- params=opts,
+ options=opts,
)
def delete(
self,
path: str,
*,
- body: Body | None = None,
cast_type: type[ResponseT],
+ body: Body | None = None,
options: UserRequestInput = {},
) -> ResponseT | StreamResponse:
- opts = ClientRequestParam.construct(method="delete", url=path, json_data=body, **options)
+ opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
return self.request(
cast_type=cast_type,
- params=opts,
+ options=opts,
)
+ def get_api_list(
+ self,
+ path: str,
+ *,
+ model: type[object],
+ page: type[SyncPageT],
+ body: Body | None = None,
+ options: UserRequestInput = {},
+ method: str = "get",
+ ) -> SyncPageT:
+ opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options)
+ return self._request_api_list(model, page, opts)
+
def _make_status_error(self, response) -> APIStatusError:
response_text = response.text.strip()
status_code = response.status_code
@@ -343,24 +865,46 @@ class HttpClient:
return APIStatusError(message=error_msg, response=response)
-def make_user_request_input(
- max_retries: int | None = None,
- timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
- extra_headers: Headers = None,
- extra_body: Body | None = None,
+def make_request_options(
+ *,
query: Query | None = None,
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ post_parser: PostParser | NotGiven = NOT_GIVEN,
) -> UserRequestInput:
+ """Create a dict of type RequestOptions without keys of NotGiven values."""
options: UserRequestInput = {}
-
if extra_headers is not None:
options["headers"] = extra_headers
- if max_retries is not None:
- options["max_retries"] = max_retries
- if not isinstance(timeout, NotGiven):
- options["timeout"] = timeout
- if query is not None:
- options["params"] = query
+
if extra_body is not None:
options["extra_json"] = cast(AnyMapping, extra_body)
+ if query is not None:
+ options["params"] = query
+
+ if extra_query is not None:
+ options["params"] = {**options.get("params", {}), **extra_query}
+
+ if not isinstance(timeout, NotGiven):
+ options["timeout"] = timeout
+
+ if is_given(post_parser):
+ # internal
+ options["post_parser"] = post_parser # type: ignore
+
return options
+
+
+def _merge_mappings(
+ obj1: Mapping[_T_co, Union[_T, Omit]],
+ obj2: Mapping[_T_co, Union[_T, Omit]],
+) -> dict[_T_co, _T]:
+ """Merge two mappings of the same type, removing any values that are instances of `Omit`.
+
+ In cases with duplicate keys the second mapping takes precedence.
+ """
+ merged = {**obj1, **obj2}
+ return {key: value for key, value in merged.items() if not isinstance(value, Omit)}
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py
index b0a91d04a9..21f158a5f4 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py
@@ -3,9 +3,11 @@ import time
import cachetools.func
import jwt
-API_TOKEN_TTL_SECONDS = 3 * 60
+# 缓存时间 3分钟
+CACHE_TTL_SECONDS = 3 * 60
-CACHE_TTL_SECONDS = API_TOKEN_TTL_SECONDS - 30
+# token 有效期比缓存时间 多30秒
+API_TOKEN_TTL_SECONDS = CACHE_TTL_SECONDS + 30
@cachetools.func.ttl_cache(maxsize=10, ttl=CACHE_TTL_SECONDS)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py
new file mode 100644
index 0000000000..51623bd860
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py
@@ -0,0 +1,207 @@
+from __future__ import annotations
+
+import os
+from collections.abc import AsyncIterator, Iterator
+from typing import Any
+
+import httpx
+
+
+class HttpxResponseContent:
+ @property
+ def content(self) -> bytes:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ @property
+ def text(self) -> str:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ @property
+ def encoding(self) -> str | None:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ @property
+ def charset_encoding(self) -> str | None:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def json(self, **kwargs: Any) -> Any:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def read(self) -> bytes:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def iter_lines(self) -> Iterator[str]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def write_to_file(
+ self,
+ file: str | os.PathLike[str],
+ ) -> None:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def stream_to_file(
+ self,
+ file: str | os.PathLike[str],
+ *,
+ chunk_size: int | None = None,
+ ) -> None:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ def close(self) -> None:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ async def aread(self) -> bytes:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ async def aiter_lines(self) -> AsyncIterator[str]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ async def astream_to_file(
+ self,
+ file: str | os.PathLike[str],
+ *,
+ chunk_size: int | None = None,
+ ) -> None:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+ async def aclose(self) -> None:
+ raise NotImplementedError("This method is not implemented for this class.")
+
+
+class HttpxBinaryResponseContent(HttpxResponseContent):
+ response: httpx.Response
+
+ def __init__(self, response: httpx.Response) -> None:
+ self.response = response
+
+ @property
+ def content(self) -> bytes:
+ return self.response.content
+
+ @property
+ def encoding(self) -> str | None:
+ return self.response.encoding
+
+ @property
+ def charset_encoding(self) -> str | None:
+ return self.response.charset_encoding
+
+ def read(self) -> bytes:
+ return self.response.read()
+
+ def text(self) -> str:
+ raise NotImplementedError("Not implemented for binary response content")
+
+ def json(self, **kwargs: Any) -> Any:
+ raise NotImplementedError("Not implemented for binary response content")
+
+ def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+ raise NotImplementedError("Not implemented for binary response content")
+
+ def iter_lines(self) -> Iterator[str]:
+ raise NotImplementedError("Not implemented for binary response content")
+
+ async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+ raise NotImplementedError("Not implemented for binary response content")
+
+ async def aiter_lines(self) -> AsyncIterator[str]:
+ raise NotImplementedError("Not implemented for binary response content")
+
+ def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+ return self.response.iter_bytes(chunk_size)
+
+ def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+ return self.response.iter_raw(chunk_size)
+
+ def write_to_file(
+ self,
+ file: str | os.PathLike[str],
+ ) -> None:
+ """Write the output to the given file.
+
+ Accepts a filename or any path-like object, e.g. pathlib.Path
+
+ Note: if you want to stream the data to the file instead of writing
+ all at once then you should use `.with_streaming_response` when making
+ the API request, e.g. `client.with_streaming_response.foo().stream_to_file('my_filename.txt')`
+ """
+ with open(file, mode="wb") as f:
+ for data in self.response.iter_bytes():
+ f.write(data)
+
+ def stream_to_file(
+ self,
+ file: str | os.PathLike[str],
+ *,
+ chunk_size: int | None = None,
+ ) -> None:
+ with open(file, mode="wb") as f:
+ for data in self.response.iter_bytes(chunk_size):
+ f.write(data)
+
+ def close(self) -> None:
+ return self.response.close()
+
+ async def aread(self) -> bytes:
+ return await self.response.aread()
+
+ async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+ return self.response.aiter_bytes(chunk_size)
+
+ async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+ return self.response.aiter_raw(chunk_size)
+
+ async def astream_to_file(
+ self,
+ file: str | os.PathLike[str],
+ *,
+ chunk_size: int | None = None,
+ ) -> None:
+ path = anyio.Path(file)
+ async with await path.open(mode="wb") as f:
+ async for data in self.response.aiter_bytes(chunk_size):
+ await f.write(data)
+
+ async def aclose(self) -> None:
+ return await self.response.aclose()
+
+
+class HttpxTextBinaryResponseContent(HttpxBinaryResponseContent):
+ response: httpx.Response
+
+ @property
+ def text(self) -> str:
+ return self.response.text
+
+ def json(self, **kwargs: Any) -> Any:
+ return self.response.json(**kwargs)
+
+ def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+ return self.response.iter_text(chunk_size)
+
+ def iter_lines(self) -> Iterator[str]:
+ return self.response.iter_lines()
+
+ async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+ return self.response.aiter_text(chunk_size)
+
+ async def aiter_lines(self) -> AsyncIterator[str]:
+ return self.response.aiter_lines()
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
new file mode 100644
index 0000000000..51bf21bcdc
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
@@ -0,0 +1,341 @@
+from __future__ import annotations
+
+import datetime
+import functools
+import inspect
+import logging
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, get_origin, overload
+
+import httpx
+import pydantic
+from typing_extensions import ParamSpec, override
+
+from ._base_models import BaseModel, is_basemodel
+from ._base_type import NoneType
+from ._constants import RAW_RESPONSE_HEADER
+from ._errors import APIResponseValidationError
+from ._legacy_binary_response import HttpxResponseContent, HttpxTextBinaryResponseContent
+from ._sse_client import StreamResponse, extract_stream_chunk_type, is_stream_class_type
+from ._utils import extract_type_arg, is_annotated_type, is_given
+
+if TYPE_CHECKING:
+ from ._http_client import HttpClient
+ from ._request_opt import FinalRequestOptions
+
+P = ParamSpec("P")
+R = TypeVar("R")
+_T = TypeVar("_T")
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class LegacyAPIResponse(Generic[R]):
+ """This is a legacy class as it will be replaced by `APIResponse`
+ and `AsyncAPIResponse` in the `_response.py` file in the next major
+ release.
+
+ For the sync client this will mostly be the same with the exception
+ of `content` & `text` will be methods instead of properties. In the
+ async client, all methods will be async.
+
+ A migration script will be provided & the migration in general should
+ be smooth.
+ """
+
+ _cast_type: type[R]
+ _client: HttpClient
+ _parsed_by_type: dict[type[Any], Any]
+ _stream: bool
+ _stream_cls: type[StreamResponse[Any]] | None
+ _options: FinalRequestOptions
+
+ http_response: httpx.Response
+
+ def __init__(
+ self,
+ *,
+ raw: httpx.Response,
+ cast_type: type[R],
+ client: HttpClient,
+ stream: bool,
+ stream_cls: type[StreamResponse[Any]] | None,
+ options: FinalRequestOptions,
+ ) -> None:
+ self._cast_type = cast_type
+ self._client = client
+ self._parsed_by_type = {}
+ self._stream = stream
+ self._stream_cls = stream_cls
+ self._options = options
+ self.http_response = raw
+
+ @property
+ def request_id(self) -> str | None:
+ return self.http_response.headers.get("x-request-id") # type: ignore[no-any-return]
+
+ @overload
+ def parse(self, *, to: type[_T]) -> _T: ...
+
+ @overload
+ def parse(self) -> R: ...
+
+ def parse(self, *, to: type[_T] | None = None) -> R | _T:
+ """Returns the rich python representation of this response's data.
+
+ NOTE: For the async client: this will become a coroutine in the next major version.
+
+ For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+ You can customize the type that the response is parsed into through
+ the `to` argument, e.g.
+
+ ```py
+ from zhipuai import BaseModel
+
+
+ class MyModel(BaseModel):
+ foo: str
+
+
+ obj = response.parse(to=MyModel)
+ print(obj.foo)
+ ```
+
+ We support parsing:
+ - `BaseModel`
+ - `dict`
+ - `list`
+ - `Union`
+ - `str`
+ - `int`
+ - `float`
+ - `httpx.Response`
+ """
+ cache_key = to if to is not None else self._cast_type
+ cached = self._parsed_by_type.get(cache_key)
+ if cached is not None:
+ return cached # type: ignore[no-any-return]
+
+ parsed = self._parse(to=to)
+ if is_given(self._options.post_parser):
+ parsed = self._options.post_parser(parsed)
+
+ self._parsed_by_type[cache_key] = parsed
+ return parsed
+
+ @property
+ def headers(self) -> httpx.Headers:
+ return self.http_response.headers
+
+ @property
+ def http_request(self) -> httpx.Request:
+ return self.http_response.request
+
+ @property
+ def status_code(self) -> int:
+ return self.http_response.status_code
+
+ @property
+ def url(self) -> httpx.URL:
+ return self.http_response.url
+
+ @property
+ def method(self) -> str:
+ return self.http_request.method
+
+ @property
+ def content(self) -> bytes:
+ """Return the binary response content.
+
+ NOTE: this will be removed in favour of `.read()` in the
+ next major version.
+ """
+ return self.http_response.content
+
+ @property
+ def text(self) -> str:
+ """Return the decoded response content.
+
+ NOTE: this will be turned into a method in the next major version.
+ """
+ return self.http_response.text
+
+ @property
+ def http_version(self) -> str:
+ return self.http_response.http_version
+
+ @property
+ def is_closed(self) -> bool:
+ return self.http_response.is_closed
+
+ @property
+ def elapsed(self) -> datetime.timedelta:
+ """The time taken for the complete request/response cycle to complete."""
+ return self.http_response.elapsed
+
+ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+ # unwrap `Annotated[T, ...]` -> `T`
+ if to and is_annotated_type(to):
+ to = extract_type_arg(to, 0)
+
+ if self._stream:
+ if to:
+ if not is_stream_class_type(to):
+ raise TypeError(f"Expected custom parse type to be a subclass of {StreamResponse}")
+
+ return cast(
+ _T,
+ to(
+ cast_type=extract_stream_chunk_type(
+ to,
+ failure_message="Expected custom stream type to be passed with a type argument, e.g. StreamResponse[ChunkType]", # noqa: E501
+ ),
+ response=self.http_response,
+ client=cast(Any, self._client),
+ ),
+ )
+
+ if self._stream_cls:
+ return cast(
+ R,
+ self._stream_cls(
+ cast_type=extract_stream_chunk_type(self._stream_cls),
+ response=self.http_response,
+ client=cast(Any, self._client),
+ ),
+ )
+
+ stream_cls = cast("type[StreamResponse[Any]] | None", self._client._default_stream_cls)
+ if stream_cls is None:
+ raise MissingStreamClassError()
+
+ return cast(
+ R,
+ stream_cls(
+ cast_type=self._cast_type,
+ response=self.http_response,
+ client=cast(Any, self._client),
+ ),
+ )
+
+ cast_type = to if to is not None else self._cast_type
+
+ # unwrap `Annotated[T, ...]` -> `T`
+ if is_annotated_type(cast_type):
+ cast_type = extract_type_arg(cast_type, 0)
+
+ if cast_type is NoneType:
+ return cast(R, None)
+
+ response = self.http_response
+ if cast_type == str:
+ return cast(R, response.text)
+
+ if cast_type == int:
+ return cast(R, int(response.text))
+
+ if cast_type == float:
+ return cast(R, float(response.text))
+
+ origin = get_origin(cast_type) or cast_type
+
+ if inspect.isclass(origin) and issubclass(origin, HttpxResponseContent):
+ # in the response, e.g. mime file
+ *_, filename = response.headers.get("content-disposition", "").split("filename=")
+ # 判断文件类型是jsonl类型的使用HttpxTextBinaryResponseContent
+ if filename and filename.endswith(".jsonl") or filename and filename.endswith(".xlsx"):
+ return cast(R, HttpxTextBinaryResponseContent(response))
+ else:
+ return cast(R, cast_type(response)) # type: ignore
+
+ if origin == LegacyAPIResponse:
+ raise RuntimeError("Unexpected state - cast_type is `APIResponse`")
+
+ if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+ # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+ # and pass that class to our request functions. We cannot change the variance to be either
+ # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+ # the response class ourselves but that is something that should be supported directly in httpx
+ # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+ if cast_type != httpx.Response:
+ raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_type`")
+ return cast(R, response)
+
+ if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+ raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
+ if (
+ cast_type is not object
+ and origin is not list
+ and origin is not dict
+ and origin is not Union
+ and not issubclass(origin, BaseModel)
+ ):
+ raise RuntimeError(
+ f"Unsupported type, expected {cast_type} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}." # noqa: E501
+ )
+
+ # split is required to handle cases where additional information is included
+ # in the response, e.g. application/json; charset=utf-8
+ content_type, *_ = response.headers.get("content-type", "*").split(";")
+ if content_type != "application/json":
+ if is_basemodel(cast_type):
+ try:
+ data = response.json()
+ except Exception as exc:
+ log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+ else:
+ return self._client._process_response_data(
+ data=data,
+ cast_type=cast_type, # type: ignore
+ response=response,
+ )
+
+ if self._client._strict_response_validation:
+ raise APIResponseValidationError(
+ response=response,
+ message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.", # noqa: E501
+ json_data=response.text,
+ )
+
+ # If the API responds with content that isn't JSON then we just return
+ # the (decoded) text without performing any parsing so that you can still
+ # handle the response however you need to.
+ return response.text # type: ignore
+
+ data = response.json()
+
+ return self._client._process_response_data(
+ data=data,
+ cast_type=cast_type, # type: ignore
+ response=response,
+ )
+
+ @override
+ def __repr__(self) -> str:
+ return f""
+
+
+class MissingStreamClassError(TypeError):
+ def __init__(self) -> None:
+ super().__init__(
+ "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference", # noqa: E501
+ )
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
+ """Higher order function that takes one of our bound API methods and wraps it
+ to support returning the raw `APIResponse` object directly.
+ """
+
+ @functools.wraps(func)
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+ extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+ extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+ kwargs["extra_headers"] = extra_headers
+
+ return cast(LegacyAPIResponse[R], func(*args, **kwargs))
+
+ return wrapped
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py
index ac459151fc..c3b894b3a3 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py
@@ -1,48 +1,97 @@
from __future__ import annotations
-from typing import Any, ClassVar, Union
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, ClassVar, Union, cast
+import pydantic.generics
from httpx import Timeout
-from pydantic import ConfigDict
-from typing_extensions import TypedDict, Unpack
+from typing_extensions import Required, TypedDict, Unpack, final
-from ._base_type import Body, Headers, HttpxRequestFiles, NotGiven, Query
-from ._utils import remove_notgiven_indict
+from ._base_compat import PYDANTIC_V2, ConfigDict
+from ._base_type import AnyMapping, Body, Headers, HttpxRequestFiles, NotGiven, Query
+from ._constants import RAW_RESPONSE_HEADER
+from ._utils import is_given, strip_not_given
class UserRequestInput(TypedDict, total=False):
+ headers: Headers
max_retries: int
timeout: float | Timeout | None
+ params: Query
+ extra_json: AnyMapping
+
+
+class FinalRequestOptionsInput(TypedDict, total=False):
+ method: Required[str]
+ url: Required[str]
+ params: Query
headers: Headers
- params: Query | None
+ max_retries: int
+ timeout: float | Timeout | None
+ files: HttpxRequestFiles | None
+ json_data: Body
+ extra_json: AnyMapping
-class ClientRequestParam:
+@final
+class FinalRequestOptions(pydantic.BaseModel):
method: str
url: str
- max_retries: Union[int, NotGiven] = NotGiven()
- timeout: Union[float, NotGiven] = NotGiven()
- headers: Union[Headers, NotGiven] = NotGiven()
- json_data: Union[Body, None] = None
- files: Union[HttpxRequestFiles, None] = None
params: Query = {}
- model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
+ headers: Union[Headers, NotGiven] = NotGiven()
+ max_retries: Union[int, NotGiven] = NotGiven()
+ timeout: Union[float, Timeout, None, NotGiven] = NotGiven()
+ files: Union[HttpxRequestFiles, None] = None
+ idempotency_key: Union[str, None] = None
+ post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
- def get_max_retries(self, max_retries) -> int:
+ # It should be noted that we cannot use `json` here as that would override
+ # a BaseModel method in an incompatible fashion.
+ json_data: Union[Body, None] = None
+ extra_json: Union[AnyMapping, None] = None
+
+ if PYDANTIC_V2:
+ model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
+ else:
+
+ class Config(pydantic.BaseConfig): # pyright: ignore[reportDeprecated]
+ arbitrary_types_allowed: bool = True
+
+ def get_max_retries(self, max_retries: int) -> int:
if isinstance(self.max_retries, NotGiven):
return max_retries
return self.max_retries
+ def _strip_raw_response_header(self) -> None:
+ if not is_given(self.headers):
+ return
+
+ if self.headers.get(RAW_RESPONSE_HEADER):
+ self.headers = {**self.headers}
+ self.headers.pop(RAW_RESPONSE_HEADER)
+
+ # override the `construct` method so that we can run custom transformations.
+ # this is necessary as we don't want to do any actual runtime type checking
+ # (which means we can't use validators) but we do want to ensure that `NotGiven`
+ # values are not present
+ #
+ # type ignore required because we're adding explicit types to `**values`
@classmethod
def construct( # type: ignore
cls,
_fields_set: set[str] | None = None,
**values: Unpack[UserRequestInput],
- ) -> ClientRequestParam:
- kwargs: dict[str, Any] = {key: remove_notgiven_indict(value) for key, value in values.items()}
- client = cls()
- client.__dict__.update(kwargs)
+ ) -> FinalRequestOptions:
+ kwargs: dict[str, Any] = {
+ # we unconditionally call `strip_not_given` on any value
+ # as it will just ignore any non-mapping types
+ key: strip_not_given(value)
+ for key, value in values.items()
+ }
+ if PYDANTIC_V2:
+ return super().model_construct(_fields_set, **kwargs)
+ return cast(FinalRequestOptions, super().construct(_fields_set, **kwargs)) # pyright: ignore[reportDeprecated]
- return client
-
- model_construct = construct
+ if not TYPE_CHECKING:
+ # type checkers incorrectly complain about this assignment
+ model_construct = construct
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
index 56e60a7934..92e6018055 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
@@ -1,87 +1,193 @@
from __future__ import annotations
import datetime
-from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast, get_args, get_origin
+import inspect
+import logging
+from collections.abc import Iterator
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, get_origin, overload
import httpx
import pydantic
-from typing_extensions import ParamSpec
+from typing_extensions import ParamSpec, override
+from ._base_models import BaseModel, is_basemodel
from ._base_type import NoneType
-from ._sse_client import StreamResponse
+from ._errors import APIResponseValidationError, ZhipuAIError
+from ._sse_client import StreamResponse, extract_stream_chunk_type, is_stream_class_type
+from ._utils import extract_type_arg, extract_type_var_from_base, is_annotated_type, is_given
if TYPE_CHECKING:
from ._http_client import HttpClient
+ from ._request_opt import FinalRequestOptions
P = ParamSpec("P")
R = TypeVar("R")
+_T = TypeVar("_T")
+_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
+log: logging.Logger = logging.getLogger(__name__)
-class HttpResponse(Generic[R]):
+class BaseAPIResponse(Generic[R]):
_cast_type: type[R]
_client: HttpClient
- _parsed: R | None
- _enable_stream: bool
+ _parsed_by_type: dict[type[Any], Any]
+ _is_sse_stream: bool
_stream_cls: type[StreamResponse[Any]]
+ _options: FinalRequestOptions
http_response: httpx.Response
def __init__(
self,
*,
- raw_response: httpx.Response,
+ raw: httpx.Response,
cast_type: type[R],
client: HttpClient,
- enable_stream: bool = False,
+ stream: bool,
stream_cls: type[StreamResponse[Any]] | None = None,
+ options: FinalRequestOptions,
) -> None:
self._cast_type = cast_type
self._client = client
- self._parsed = None
+ self._parsed_by_type = {}
+ self._is_sse_stream = stream
self._stream_cls = stream_cls
- self._enable_stream = enable_stream
- self.http_response = raw_response
+ self._options = options
+ self.http_response = raw
- def parse(self) -> R:
- self._parsed = self._parse()
- return self._parsed
+ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+ # unwrap `Annotated[T, ...]` -> `T`
+ if to and is_annotated_type(to):
+ to = extract_type_arg(to, 0)
- def _parse(self) -> R:
- if self._enable_stream:
- self._parsed = cast(
- R,
- self._stream_cls(
- cast_type=cast(type, get_args(self._stream_cls)[0]),
- response=self.http_response,
- client=self._client,
- ),
- )
- return self._parsed
- cast_type = self._cast_type
- if cast_type is NoneType:
- return cast(R, None)
- http_response = self.http_response
- if cast_type == str:
- return cast(R, http_response.text)
+ if self._is_sse_stream:
+ if to:
+ if not is_stream_class_type(to):
+ raise TypeError(f"Expected custom parse type to be a subclass of {StreamResponse}")
- content_type, *_ = http_response.headers.get("content-type", "application/json").split(";")
- origin = get_origin(cast_type) or cast_type
- if content_type != "application/json":
- if issubclass(origin, pydantic.BaseModel):
- data = http_response.json()
- return self._client._process_response_data(
- data=data,
- cast_type=cast_type, # type: ignore
- response=http_response,
+ return cast(
+ _T,
+ to(
+ cast_type=extract_stream_chunk_type(
+ to,
+ failure_message="Expected custom stream type to be passed with a type argument, e.g. StreamResponse[ChunkType]", # noqa: E501
+ ),
+ response=self.http_response,
+ client=cast(Any, self._client),
+ ),
)
- return http_response.text
+ if self._stream_cls:
+ return cast(
+ R,
+ self._stream_cls(
+ cast_type=extract_stream_chunk_type(self._stream_cls),
+ response=self.http_response,
+ client=cast(Any, self._client),
+ ),
+ )
- data = http_response.json()
+ stream_cls = cast("type[Stream[Any]] | None", self._client._default_stream_cls)
+ if stream_cls is None:
+ raise MissingStreamClassError()
+
+ return cast(
+ R,
+ stream_cls(
+ cast_type=self._cast_type,
+ response=self.http_response,
+ client=cast(Any, self._client),
+ ),
+ )
+
+ cast_type = to if to is not None else self._cast_type
+
+ # unwrap `Annotated[T, ...]` -> `T`
+ if is_annotated_type(cast_type):
+ cast_type = extract_type_arg(cast_type, 0)
+
+ if cast_type is NoneType:
+ return cast(R, None)
+
+ response = self.http_response
+ if cast_type == str:
+ return cast(R, response.text)
+
+ if cast_type == bytes:
+ return cast(R, response.content)
+
+ if cast_type == int:
+ return cast(R, int(response.text))
+
+ if cast_type == float:
+ return cast(R, float(response.text))
+
+ origin = get_origin(cast_type) or cast_type
+
+ # handle the legacy binary response case
+ if inspect.isclass(cast_type) and cast_type.__name__ == "HttpxBinaryResponseContent":
+ return cast(R, cast_type(response)) # type: ignore
+
+ if origin == APIResponse:
+ raise RuntimeError("Unexpected state - cast_type is `APIResponse`")
+
+ if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+ # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+ # and pass that class to our request functions. We cannot change the variance to be either
+ # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+ # the response class ourselves but that is something that should be supported directly in httpx
+ # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+ if cast_type != httpx.Response:
+ raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_type`")
+ return cast(R, response)
+
+ if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+ raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
+ if (
+ cast_type is not object
+ and origin is not list
+ and origin is not dict
+ and origin is not Union
+ and not issubclass(origin, BaseModel)
+ ):
+ raise RuntimeError(
+ f"Unsupported type, expected {cast_type} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}." # noqa: E501
+ )
+
+ # split is required to handle cases where additional information is included
+ # in the response, e.g. application/json; charset=utf-8
+ content_type, *_ = response.headers.get("content-type", "*").split(";")
+ if content_type != "application/json":
+ if is_basemodel(cast_type):
+ try:
+ data = response.json()
+ except Exception as exc:
+ log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+ else:
+ return self._client._process_response_data(
+ data=data,
+ cast_type=cast_type, # type: ignore
+ response=response,
+ )
+
+ if self._client._strict_response_validation:
+ raise APIResponseValidationError(
+ response=response,
+ message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.", # noqa: E501
+ json_data=response.text,
+ )
+
+ # If the API responds with content that isn't JSON then we just return
+ # the (decoded) text without performing any parsing so that you can still
+ # handle the response however you need to.
+ return response.text # type: ignore
+
+ data = response.json()
return self._client._process_response_data(
data=data,
cast_type=cast_type, # type: ignore
- response=http_response,
+ response=response,
)
@property
@@ -90,6 +196,7 @@ class HttpResponse(Generic[R]):
@property
def http_request(self) -> httpx.Request:
+ """Returns the httpx Request instance associated with the current response."""
return self.http_response.request
@property
@@ -98,24 +205,194 @@ class HttpResponse(Generic[R]):
@property
def url(self) -> httpx.URL:
+ """Returns the URL for which the request was made."""
return self.http_response.url
@property
def method(self) -> str:
return self.http_request.method
- @property
- def content(self) -> bytes:
- return self.http_response.content
-
- @property
- def text(self) -> str:
- return self.http_response.text
-
@property
def http_version(self) -> str:
return self.http_response.http_version
@property
def elapsed(self) -> datetime.timedelta:
+ """The time taken for the complete request/response cycle to complete."""
return self.http_response.elapsed
+
+ @property
+ def is_closed(self) -> bool:
+ """Whether or not the response body has been closed.
+
+ If this is False then there is response data that has not been read yet.
+ You must either fully consume the response body or call `.close()`
+ before discarding the response to prevent resource leaks.
+ """
+ return self.http_response.is_closed
+
+ @override
+ def __repr__(self) -> str:
+ return f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_type}>" # noqa: E501
+
+
+class APIResponse(BaseAPIResponse[R]):
+ @property
+ def request_id(self) -> str | None:
+ return self.http_response.headers.get("x-request-id") # type: ignore[no-any-return]
+
+ @overload
+ def parse(self, *, to: type[_T]) -> _T: ...
+
+ @overload
+ def parse(self) -> R: ...
+
+ def parse(self, *, to: type[_T] | None = None) -> R | _T:
+ """Returns the rich python representation of this response's data.
+
+ For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+ You can customize the type that the response is parsed into through
+ the `to` argument, e.g.
+
+ ```py
+ from openai import BaseModel
+
+
+ class MyModel(BaseModel):
+ foo: str
+
+
+ obj = response.parse(to=MyModel)
+ print(obj.foo)
+ ```
+
+ We support parsing:
+ - `BaseModel`
+ - `dict`
+ - `list`
+ - `Union`
+ - `str`
+ - `int`
+ - `float`
+ - `httpx.Response`
+ """
+ cache_key = to if to is not None else self._cast_type
+ cached = self._parsed_by_type.get(cache_key)
+ if cached is not None:
+ return cached # type: ignore[no-any-return]
+
+ if not self._is_sse_stream:
+ self.read()
+
+ parsed = self._parse(to=to)
+ if is_given(self._options.post_parser):
+ parsed = self._options.post_parser(parsed)
+
+ self._parsed_by_type[cache_key] = parsed
+ return parsed
+
+ def read(self) -> bytes:
+ """Read and return the binary response content."""
+ try:
+ return self.http_response.read()
+ except httpx.StreamConsumed as exc:
+ # The default error raised by httpx isn't very
+ # helpful in our case so we re-raise it with
+ # a different error message.
+ raise StreamAlreadyConsumed() from exc
+
+ def text(self) -> str:
+ """Read and decode the response content into a string."""
+ self.read()
+ return self.http_response.text
+
+ def json(self) -> object:
+ """Read and decode the JSON response content."""
+ self.read()
+ return self.http_response.json()
+
+ def close(self) -> None:
+ """Close the response and release the connection.
+
+ Automatically called if the response body is read to completion.
+ """
+ self.http_response.close()
+
+ def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+ """
+ A byte-iterator over the decoded response content.
+
+ This automatically handles gzip, deflate and brotli encoded responses.
+ """
+ yield from self.http_response.iter_bytes(chunk_size)
+
+ def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+ """A str-iterator over the decoded response content
+ that handles both gzip, deflate, etc but also detects the content's
+ string encoding.
+ """
+ yield from self.http_response.iter_text(chunk_size)
+
+ def iter_lines(self) -> Iterator[str]:
+ """Like `iter_text()` but will only yield chunks for each line"""
+ yield from self.http_response.iter_lines()
+
+
+class MissingStreamClassError(TypeError):
+ def __init__(self) -> None:
+ super().__init__(
+ "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference", # noqa: E501
+ )
+
+
+class StreamAlreadyConsumed(ZhipuAIError): # noqa: N818
+ """
+ Attempted to read or stream content, but the content has already
+ been streamed.
+
+ This can happen if you use a method like `.iter_lines()` and then attempt
+ to read th entire response body afterwards, e.g.
+
+ ```py
+ response = await client.post(...)
+ async for line in response.iter_lines():
+ ... # do something with `line`
+
+ content = await response.read()
+ # ^ error
+ ```
+
+ If you want this behavior you'll need to either manually accumulate the response
+ content or call `await response.read()` before iterating over the stream.
+ """
+
+ def __init__(self) -> None:
+ message = (
+ "Attempted to read or stream some content, but the content has "
+ "already been streamed. "
+ "This could be due to attempting to stream the response "
+ "content more than once."
+ "\n\n"
+ "You can fix this by manually accumulating the response content while streaming "
+ "or by calling `.read()` before starting to stream."
+ )
+ super().__init__(message)
+
+
+def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
+ """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
+
+ This also handles the case where a concrete subclass is given, e.g.
+ ```py
+ class MyResponse(APIResponse[bytes]):
+ ...
+
+ extract_response_type(MyResponse) -> bytes
+ ```
+ """
+ return extract_type_var_from_base(
+ typ,
+ generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse)),
+ index=0,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py
index ec2745d059..cbc449d244 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py
@@ -1,13 +1,16 @@
from __future__ import annotations
+import inspect
import json
from collections.abc import Iterator, Mapping
-from typing import TYPE_CHECKING, Generic
+from typing import TYPE_CHECKING, Generic, TypeGuard, cast
import httpx
+from . import get_origin
from ._base_type import ResponseT
from ._errors import APIResponseError
+from ._utils import extract_type_var_from_base, is_mapping
_FIELD_SEPARATOR = ":"
@@ -53,8 +56,41 @@ class StreamResponse(Generic[ResponseT]):
request=self.response.request,
json_data=data["error"],
)
+ if sse.event is None:
+ data = sse.json_data()
+ if is_mapping(data) and data.get("error"):
+ message = None
+ error = data.get("error")
+ if is_mapping(error):
+ message = error.get("message")
+ if not message or not isinstance(message, str):
+ message = "An error occurred during streaming"
+ raise APIResponseError(
+ message=message,
+ request=self.response.request,
+ json_data=data["error"],
+ )
yield self._data_process_func(data=data, cast_type=self._cast_type, response=self.response)
+
+ else:
+ data = sse.json_data()
+
+ if sse.event == "error" and is_mapping(data) and data.get("error"):
+ message = None
+ error = data.get("error")
+ if is_mapping(error):
+ message = error.get("message")
+ if not message or not isinstance(message, str):
+ message = "An error occurred during streaming"
+
+ raise APIResponseError(
+ message=message,
+ request=self.response.request,
+ json_data=data["error"],
+ )
+ yield self._data_process_func(data=data, cast_type=self._cast_type, response=self.response)
+
for sse in iterator:
pass
@@ -138,3 +174,33 @@ class SSELineParser:
except (TypeError, ValueError):
pass
return
+
+
+def is_stream_class_type(typ: type) -> TypeGuard[type[StreamResponse[object]]]:
+ """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`"""
+ origin = get_origin(typ) or typ
+ return inspect.isclass(origin) and issubclass(origin, StreamResponse)
+
+
+def extract_stream_chunk_type(
+ stream_cls: type,
+ *,
+ failure_message: str | None = None,
+) -> type:
+ """Given a type like `StreamResponse[T]`, returns the generic type variable `T`.
+
+ This also handles the case where a concrete subclass is given, e.g.
+ ```py
+ class MyStream(StreamResponse[bytes]):
+ ...
+
+ extract_stream_chunk_type(MyStream) -> bytes
+ ```
+ """
+
+ return extract_type_var_from_base(
+ stream_cls,
+ index=0,
+ generic_bases=cast("tuple[type, ...]", (StreamResponse,)),
+ failure_message=failure_message,
+ )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py
deleted file mode 100644
index 6b610567da..0000000000
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from collections.abc import Iterable, Mapping
-from typing import TypeVar
-
-from ._base_type import NotGiven
-
-
-def remove_notgiven_indict(obj):
- if obj is None or (not isinstance(obj, Mapping)):
- return obj
- return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
-
-
-_T = TypeVar("_T")
-
-
-def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
- return [item for sublist in t for item in sublist]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py
new file mode 100644
index 0000000000..a66b095816
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py
@@ -0,0 +1,52 @@
+from ._utils import ( # noqa: I001
+ remove_notgiven_indict as remove_notgiven_indict, # noqa: PLC0414
+ flatten as flatten, # noqa: PLC0414
+ is_dict as is_dict, # noqa: PLC0414
+ is_list as is_list, # noqa: PLC0414
+ is_given as is_given, # noqa: PLC0414
+ is_tuple as is_tuple, # noqa: PLC0414
+ is_mapping as is_mapping, # noqa: PLC0414
+ is_tuple_t as is_tuple_t, # noqa: PLC0414
+ parse_date as parse_date, # noqa: PLC0414
+ is_iterable as is_iterable, # noqa: PLC0414
+ is_sequence as is_sequence, # noqa: PLC0414
+ coerce_float as coerce_float, # noqa: PLC0414
+ is_mapping_t as is_mapping_t, # noqa: PLC0414
+ removeprefix as removeprefix, # noqa: PLC0414
+ removesuffix as removesuffix, # noqa: PLC0414
+ extract_files as extract_files, # noqa: PLC0414
+ is_sequence_t as is_sequence_t, # noqa: PLC0414
+ required_args as required_args, # noqa: PLC0414
+ coerce_boolean as coerce_boolean, # noqa: PLC0414
+ coerce_integer as coerce_integer, # noqa: PLC0414
+ file_from_path as file_from_path, # noqa: PLC0414
+ parse_datetime as parse_datetime, # noqa: PLC0414
+ strip_not_given as strip_not_given, # noqa: PLC0414
+ deepcopy_minimal as deepcopy_minimal, # noqa: PLC0414
+ get_async_library as get_async_library, # noqa: PLC0414
+ maybe_coerce_float as maybe_coerce_float, # noqa: PLC0414
+ get_required_header as get_required_header, # noqa: PLC0414
+ maybe_coerce_boolean as maybe_coerce_boolean, # noqa: PLC0414
+ maybe_coerce_integer as maybe_coerce_integer, # noqa: PLC0414
+ drop_prefix_image_data as drop_prefix_image_data, # noqa: PLC0414
+)
+
+
+from ._typing import (
+ is_list_type as is_list_type, # noqa: PLC0414
+ is_union_type as is_union_type, # noqa: PLC0414
+ extract_type_arg as extract_type_arg, # noqa: PLC0414
+ is_iterable_type as is_iterable_type, # noqa: PLC0414
+ is_required_type as is_required_type, # noqa: PLC0414
+ is_annotated_type as is_annotated_type, # noqa: PLC0414
+ strip_annotated_type as strip_annotated_type, # noqa: PLC0414
+ extract_type_var_from_base as extract_type_var_from_base, # noqa: PLC0414
+)
+
+from ._transform import (
+ PropertyInfo as PropertyInfo, # noqa: PLC0414
+ transform as transform, # noqa: PLC0414
+ async_transform as async_transform, # noqa: PLC0414
+ maybe_transform as maybe_transform, # noqa: PLC0414
+ async_maybe_transform as async_maybe_transform, # noqa: PLC0414
+)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py
new file mode 100644
index 0000000000..e8ef1f7935
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py
@@ -0,0 +1,383 @@
+from __future__ import annotations
+
+import base64
+import io
+import pathlib
+from collections.abc import Mapping
+from datetime import date, datetime
+from typing import Any, Literal, TypeVar, cast, get_args, get_type_hints
+
+import anyio
+import pydantic
+from typing_extensions import override
+
+from .._base_compat import is_typeddict, model_dump
+from .._files import is_base64_file_input
+from ._typing import (
+ extract_type_arg,
+ is_annotated_type,
+ is_iterable_type,
+ is_list_type,
+ is_required_type,
+ is_union_type,
+ strip_annotated_type,
+)
+from ._utils import (
+ is_iterable,
+ is_list,
+ is_mapping,
+)
+
+_T = TypeVar("_T")
+
+
+# TODO: support for drilling globals() and locals()
+# TODO: ensure works correctly with forward references in all cases
+
+
+PropertyFormat = Literal["iso8601", "base64", "custom"]
+
+
+class PropertyInfo:
+ """Metadata class to be used in Annotated types to provide information about a given type.
+
+ For example:
+
+ class MyParams(TypedDict):
+ account_holder_name: Annotated[str, PropertyInfo(alias='accountHolderName')]
+
+ This means that {'account_holder_name': 'Robert'} will be transformed to {'accountHolderName': 'Robert'} before being sent to the API.
+ """ # noqa: E501
+
+ alias: str | None
+ format: PropertyFormat | None
+ format_template: str | None
+ discriminator: str | None
+
+ def __init__(
+ self,
+ *,
+ alias: str | None = None,
+ format: PropertyFormat | None = None,
+ format_template: str | None = None,
+ discriminator: str | None = None,
+ ) -> None:
+ self.alias = alias
+ self.format = format
+ self.format_template = format_template
+ self.discriminator = discriminator
+
+ @override
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')" # noqa: E501
+
+
+def maybe_transform(
+ data: object,
+ expected_type: object,
+) -> Any | None:
+ """Wrapper over `transform()` that allows `None` to be passed.
+
+ See `transform()` for more details.
+ """
+ if data is None:
+ return None
+ return transform(data, expected_type)
+
+
+# Wrapper over _transform_recursive providing fake types
+def transform(
+ data: _T,
+ expected_type: object,
+) -> _T:
+ """Transform dictionaries based off of type information from the given type, for example:
+
+ ```py
+ class Params(TypedDict, total=False):
+ card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+ transformed = transform({"card_id": ""}, Params)
+ # {'cardID': ''}
+ ```
+
+ Any keys / data that does not have type information given will be included as is.
+
+ It should be noted that the transformations that this function does are not represented in the type system.
+ """
+ transformed = _transform_recursive(data, annotation=cast(type, expected_type))
+ return cast(_T, transformed)
+
+
+def _get_annotated_type(type_: type) -> type | None:
+ """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
+
+ This also unwraps the type when applicable, e.g. `Required[Annotated[T, ...]]`
+ """
+ if is_required_type(type_):
+ # Unwrap `Required[Annotated[T, ...]]` to `Annotated[T, ...]`
+ type_ = get_args(type_)[0]
+
+ if is_annotated_type(type_):
+ return type_
+
+ return None
+
+
+def _maybe_transform_key(key: str, type_: type) -> str:
+ """Transform the given `data` based on the annotations provided in `type_`.
+
+ Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+ """
+ annotated_type = _get_annotated_type(type_)
+ if annotated_type is None:
+ # no `Annotated` definition for this type, no transformation needed
+ return key
+
+ # ignore the first argument as it is the actual type
+ annotations = get_args(annotated_type)[1:]
+ for annotation in annotations:
+ if isinstance(annotation, PropertyInfo) and annotation.alias is not None:
+ return annotation.alias
+
+ return key
+
+
+def _transform_recursive(
+ data: object,
+ *,
+ annotation: type,
+ inner_type: type | None = None,
+) -> object:
+ """Transform the given data against the expected type.
+
+ Args:
+ annotation: The direct type annotation given to the particular piece of data.
+ This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+ inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+ is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+ the list can be transformed using the metadata from the container type.
+
+ Defaults to the same value as the `annotation` argument.
+ """
+ if inner_type is None:
+ inner_type = annotation
+
+ stripped_type = strip_annotated_type(inner_type)
+ if is_typeddict(stripped_type) and is_mapping(data):
+ return _transform_typeddict(data, stripped_type)
+
+ if (
+ # List[T]
+ (is_list_type(stripped_type) and is_list(data))
+ # Iterable[T]
+ or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+ ):
+ inner_type = extract_type_arg(stripped_type, 0)
+ return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+ if is_union_type(stripped_type):
+ # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+ #
+ # TODO: there may be edge cases where the same normalized field name will transform to two different names
+ # in different subtypes.
+ for subtype in get_args(stripped_type):
+ data = _transform_recursive(data, annotation=annotation, inner_type=subtype)
+ return data
+
+ if isinstance(data, pydantic.BaseModel):
+ return model_dump(data, exclude_unset=True)
+
+ annotated_type = _get_annotated_type(annotation)
+ if annotated_type is None:
+ return data
+
+ # ignore the first argument as it is the actual type
+ annotations = get_args(annotated_type)[1:]
+ for annotation in annotations:
+ if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+ return _format_data(data, annotation.format, annotation.format_template)
+
+ return data
+
+
+def _format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+ if isinstance(data, date | datetime):
+ if format_ == "iso8601":
+ return data.isoformat()
+
+ if format_ == "custom" and format_template is not None:
+ return data.strftime(format_template)
+
+ if format_ == "base64" and is_base64_file_input(data):
+ binary: str | bytes | None = None
+
+ if isinstance(data, pathlib.Path):
+ binary = data.read_bytes()
+ elif isinstance(data, io.IOBase):
+ binary = data.read()
+
+ if isinstance(binary, str): # type: ignore[unreachable]
+ binary = binary.encode()
+
+ if not isinstance(binary, bytes):
+ raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+ return base64.b64encode(binary).decode("ascii")
+
+ return data
+
+
+def _transform_typeddict(
+ data: Mapping[str, object],
+ expected_type: type,
+) -> Mapping[str, object]:
+ result: dict[str, object] = {}
+ annotations = get_type_hints(expected_type, include_extras=True)
+ for key, value in data.items():
+ type_ = annotations.get(key)
+ if type_ is None:
+ # we do not have a type annotation for this field, leave it as is
+ result[key] = value
+ else:
+ result[_maybe_transform_key(key, type_)] = _transform_recursive(value, annotation=type_)
+ return result
+
+
+async def async_maybe_transform(
+ data: object,
+ expected_type: object,
+) -> Any | None:
+ """Wrapper over `async_transform()` that allows `None` to be passed.
+
+ See `async_transform()` for more details.
+ """
+ if data is None:
+ return None
+ return await async_transform(data, expected_type)
+
+
+async def async_transform(
+ data: _T,
+ expected_type: object,
+) -> _T:
+ """Transform dictionaries based off of type information from the given type, for example:
+
+ ```py
+ class Params(TypedDict, total=False):
+ card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+ transformed = transform({"card_id": ""}, Params)
+ # {'cardID': ''}
+ ```
+
+ Any keys / data that does not have type information given will be included as is.
+
+ It should be noted that the transformations that this function does are not represented in the type system.
+ """
+ transformed = await _async_transform_recursive(data, annotation=cast(type, expected_type))
+ return cast(_T, transformed)
+
+
+async def _async_transform_recursive(
+ data: object,
+ *,
+ annotation: type,
+ inner_type: type | None = None,
+) -> object:
+ """Transform the given data against the expected type.
+
+ Args:
+ annotation: The direct type annotation given to the particular piece of data.
+ This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+ inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+ is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+ the list can be transformed using the metadata from the container type.
+
+ Defaults to the same value as the `annotation` argument.
+ """
+ if inner_type is None:
+ inner_type = annotation
+
+ stripped_type = strip_annotated_type(inner_type)
+ if is_typeddict(stripped_type) and is_mapping(data):
+ return await _async_transform_typeddict(data, stripped_type)
+
+ if (
+ # List[T]
+ (is_list_type(stripped_type) and is_list(data))
+ # Iterable[T]
+ or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+ ):
+ inner_type = extract_type_arg(stripped_type, 0)
+ return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+ if is_union_type(stripped_type):
+ # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+ #
+ # TODO: there may be edge cases where the same normalized field name will transform to two different names
+ # in different subtypes.
+ for subtype in get_args(stripped_type):
+ data = await _async_transform_recursive(data, annotation=annotation, inner_type=subtype)
+ return data
+
+ if isinstance(data, pydantic.BaseModel):
+ return model_dump(data, exclude_unset=True)
+
+ annotated_type = _get_annotated_type(annotation)
+ if annotated_type is None:
+ return data
+
+ # ignore the first argument as it is the actual type
+ annotations = get_args(annotated_type)[1:]
+ for annotation in annotations:
+ if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+ return await _async_format_data(data, annotation.format, annotation.format_template)
+
+ return data
+
+
+async def _async_format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+ if isinstance(data, date | datetime):
+ if format_ == "iso8601":
+ return data.isoformat()
+
+ if format_ == "custom" and format_template is not None:
+ return data.strftime(format_template)
+
+ if format_ == "base64" and is_base64_file_input(data):
+ binary: str | bytes | None = None
+
+ if isinstance(data, pathlib.Path):
+ binary = await anyio.Path(data).read_bytes()
+ elif isinstance(data, io.IOBase):
+ binary = data.read()
+
+ if isinstance(binary, str): # type: ignore[unreachable]
+ binary = binary.encode()
+
+ if not isinstance(binary, bytes):
+ raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+ return base64.b64encode(binary).decode("ascii")
+
+ return data
+
+
+async def _async_transform_typeddict(
+ data: Mapping[str, object],
+ expected_type: type,
+) -> Mapping[str, object]:
+ result: dict[str, object] = {}
+ annotations = get_type_hints(expected_type, include_extras=True)
+ for key, value in data.items():
+ type_ = annotations.get(key)
+ if type_ is None:
+ # we do not have a type annotation for this field, leave it as is
+ result[key] = value
+ else:
+ result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
+ return result
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py
new file mode 100644
index 0000000000..c7c54dcc37
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from collections import abc as _c_abc
+from collections.abc import Iterable
+from typing import Annotated, Any, TypeVar, cast, get_args, get_origin
+
+from typing_extensions import Required
+
+from .._base_compat import is_union as _is_union
+from .._base_type import InheritsGeneric
+
+
+def is_annotated_type(typ: type) -> bool:
+ return get_origin(typ) == Annotated
+
+
+def is_list_type(typ: type) -> bool:
+ return (get_origin(typ) or typ) == list
+
+
+def is_iterable_type(typ: type) -> bool:
+ """If the given type is `typing.Iterable[T]`"""
+ origin = get_origin(typ) or typ
+ return origin in {Iterable, _c_abc.Iterable}
+
+
+def is_union_type(typ: type) -> bool:
+ return _is_union(get_origin(typ))
+
+
+def is_required_type(typ: type) -> bool:
+ return get_origin(typ) == Required
+
+
+def is_typevar(typ: type) -> bool:
+ # type ignore is required because type checkers
+ # think this expression will always return False
+ return type(typ) == TypeVar # type: ignore
+
+
+# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+def strip_annotated_type(typ: type) -> type:
+ if is_required_type(typ) or is_annotated_type(typ):
+ return strip_annotated_type(cast(type, get_args(typ)[0]))
+
+ return typ
+
+
+def extract_type_arg(typ: type, index: int) -> type:
+ args = get_args(typ)
+ try:
+ return cast(type, args[index])
+ except IndexError as err:
+ raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err
+
+
+def extract_type_var_from_base(
+ typ: type,
+ *,
+ generic_bases: tuple[type, ...],
+ index: int,
+ failure_message: str | None = None,
+) -> type:
+ """Given a type like `Foo[T]`, returns the generic type variable `T`.
+
+ This also handles the case where a concrete subclass is given, e.g.
+ ```py
+ class MyResponse(Foo[bytes]):
+ ...
+
+ extract_type_var(MyResponse, bases=(Foo,), index=0) -> bytes
+ ```
+
+ And where a generic subclass is given:
+ ```py
+ _T = TypeVar('_T')
+ class MyResponse(Foo[_T]):
+ ...
+
+ extract_type_var(MyResponse[bytes], bases=(Foo,), index=0) -> bytes
+ ```
+ """
+ cls = cast(object, get_origin(typ) or typ)
+ if cls in generic_bases:
+ # we're given the class directly
+ return extract_type_arg(typ, index)
+
+ # if a subclass is given
+ # ---
+ # this is needed as __orig_bases__ is not present in the typeshed stubs
+ # because it is intended to be for internal use only, however there does
+ # not seem to be a way to resolve generic TypeVars for inherited subclasses
+ # without using it.
+ if isinstance(cls, InheritsGeneric):
+ target_base_class: Any | None = None
+ for base in cls.__orig_bases__:
+ if base.__origin__ in generic_bases:
+ target_base_class = base
+ break
+
+ if target_base_class is None:
+ raise RuntimeError(
+ "Could not find the generic base class;\n"
+ "This should never happen;\n"
+ f"Does {cls} inherit from one of {generic_bases} ?"
+ )
+
+ extracted = extract_type_arg(target_base_class, index)
+ if is_typevar(extracted):
+ # If the extracted type argument is itself a type variable
+ # then that means the subclass itself is generic, so we have
+ # to resolve the type argument from the class itself, not
+ # the base class.
+ #
+ # Note: if there is more than 1 type argument, the subclass could
+ # change the ordering of the type arguments, this is not currently
+ # supported.
+ return extract_type_arg(typ, index)
+
+ return extracted
+
+ raise RuntimeError(failure_message or f"Could not resolve inner type variable at index {index} for {typ}")
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py
new file mode 100644
index 0000000000..ce5e7786aa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py
@@ -0,0 +1,409 @@
+from __future__ import annotations
+
+import functools
+import inspect
+import os
+import re
+from collections.abc import Callable, Iterable, Mapping, Sequence
+from pathlib import Path
+from typing import (
+ Any,
+ TypeGuard,
+ TypeVar,
+ Union,
+ cast,
+ overload,
+)
+
+import sniffio
+
+from .._base_compat import parse_date as parse_date # noqa: PLC0414
+from .._base_compat import parse_datetime as parse_datetime # noqa: PLC0414
+from .._base_type import FileTypes, Headers, HeadersLike, NotGiven, NotGivenOr
+
+
+def remove_notgiven_indict(obj):
+ if obj is None or (not isinstance(obj, Mapping)):
+ return obj
+ return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
+
+
+_T = TypeVar("_T")
+_TupleT = TypeVar("_TupleT", bound=tuple[object, ...])
+_MappingT = TypeVar("_MappingT", bound=Mapping[str, object])
+_SequenceT = TypeVar("_SequenceT", bound=Sequence[object])
+CallableT = TypeVar("CallableT", bound=Callable[..., Any])
+
+
+def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
+ return [item for sublist in t for item in sublist]
+
+
+def extract_files(
+ # TODO: this needs to take Dict but variance issues.....
+ # create protocol type ?
+ query: Mapping[str, object],
+ *,
+ paths: Sequence[Sequence[str]],
+) -> list[tuple[str, FileTypes]]:
+ """Recursively extract files from the given dictionary based on specified paths.
+
+ A path may look like this ['foo', 'files', '', 'data'].
+
+ Note: this mutates the given dictionary.
+ """
+ files: list[tuple[str, FileTypes]] = []
+ for path in paths:
+ files.extend(_extract_items(query, path, index=0, flattened_key=None))
+ return files
+
+
+def _extract_items(
+ obj: object,
+ path: Sequence[str],
+ *,
+ index: int,
+ flattened_key: str | None,
+) -> list[tuple[str, FileTypes]]:
+ try:
+ key = path[index]
+ except IndexError:
+ if isinstance(obj, NotGiven):
+ # no value was provided - we can safely ignore
+ return []
+
+ # cyclical import
+ from .._files import assert_is_file_content
+
+ # We have exhausted the path, return the entry we found.
+ assert_is_file_content(obj, key=flattened_key)
+ assert flattened_key is not None
+ return [(flattened_key, cast(FileTypes, obj))]
+
+ index += 1
+ if is_dict(obj):
+ try:
+ # We are at the last entry in the path so we must remove the field
+ if (len(path)) == index:
+ item = obj.pop(key)
+ else:
+ item = obj[key]
+ except KeyError:
+ # Key was not present in the dictionary, this is not indicative of an error
+ # as the given path may not point to a required field. We also do not want
+ # to enforce required fields as the API may differ from the spec in some cases.
+ return []
+ if flattened_key is None:
+ flattened_key = key
+ else:
+ flattened_key += f"[{key}]"
+ return _extract_items(
+ item,
+ path,
+ index=index,
+ flattened_key=flattened_key,
+ )
+ elif is_list(obj):
+ if key != "":
+ return []
+
+ return flatten(
+ [
+ _extract_items(
+ item,
+ path,
+ index=index,
+ flattened_key=flattened_key + "[]" if flattened_key is not None else "[]",
+ )
+ for item in obj
+ ]
+ )
+
+ # Something unexpected was passed, just ignore it.
+ return []
+
+
+def is_given(obj: NotGivenOr[_T]) -> TypeGuard[_T]:
+ return not isinstance(obj, NotGiven)
+
+
+# Type safe methods for narrowing types with TypeVars.
+# The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
+# however this cause Pyright to rightfully report errors. As we know we don't
+# care about the contained types we can safely use `object` in it's place.
+#
+# There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
+# `is_*` is for when you're dealing with an unknown input
+# `is_*_t` is for when you're narrowing a known union type to a specific subset
+
+
+def is_tuple(obj: object) -> TypeGuard[tuple[object, ...]]:
+ return isinstance(obj, tuple)
+
+
+def is_tuple_t(obj: _TupleT | object) -> TypeGuard[_TupleT]:
+ return isinstance(obj, tuple)
+
+
+def is_sequence(obj: object) -> TypeGuard[Sequence[object]]:
+ return isinstance(obj, Sequence)
+
+
+def is_sequence_t(obj: _SequenceT | object) -> TypeGuard[_SequenceT]:
+ return isinstance(obj, Sequence)
+
+
+def is_mapping(obj: object) -> TypeGuard[Mapping[str, object]]:
+ return isinstance(obj, Mapping)
+
+
+def is_mapping_t(obj: _MappingT | object) -> TypeGuard[_MappingT]:
+ return isinstance(obj, Mapping)
+
+
+def is_dict(obj: object) -> TypeGuard[dict[object, object]]:
+ return isinstance(obj, dict)
+
+
+def is_list(obj: object) -> TypeGuard[list[object]]:
+ return isinstance(obj, list)
+
+
+def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
+ return isinstance(obj, Iterable)
+
+
+def deepcopy_minimal(item: _T) -> _T:
+ """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
+
+ - mappings, e.g. `dict`
+ - list
+
+ This is done for performance reasons.
+ """
+ if is_mapping(item):
+ return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
+ if is_list(item):
+ return cast(_T, [deepcopy_minimal(entry) for entry in item])
+ return item
+
+
+# copied from https://github.com/Rapptz/RoboDanny
+def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
+ size = len(seq)
+ if size == 0:
+ return ""
+
+ if size == 1:
+ return seq[0]
+
+ if size == 2:
+ return f"{seq[0]} {final} {seq[1]}"
+
+ return delim.join(seq[:-1]) + f" {final} {seq[-1]}"
+
+
+def quote(string: str) -> str:
+ """Add single quotation marks around the given string. Does *not* do any escaping."""
+ return f"'{string}'"
+
+
+def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
+ """Decorator to enforce a given set of arguments or variants of arguments are passed to the decorated function.
+
+ Useful for enforcing runtime validation of overloaded functions.
+
+ Example usage:
+ ```py
+ @overload
+ def foo(*, a: str) -> str:
+ ...
+
+
+ @overload
+ def foo(*, b: bool) -> str:
+ ...
+
+
+ # This enforces the same constraints that a static type checker would
+ # i.e. that either a or b must be passed to the function
+ @required_args(["a"], ["b"])
+ def foo(*, a: str | None = None, b: bool | None = None) -> str:
+ ...
+ ```
+ """
+
+ def inner(func: CallableT) -> CallableT:
+ params = inspect.signature(func).parameters
+ positional = [
+ name
+ for name, param in params.items()
+ if param.kind
+ in {
+ param.POSITIONAL_ONLY,
+ param.POSITIONAL_OR_KEYWORD,
+ }
+ ]
+
+ @functools.wraps(func)
+ def wrapper(*args: object, **kwargs: object) -> object:
+ given_params: set[str] = set()
+ for i, _ in enumerate(args):
+ try:
+ given_params.add(positional[i])
+ except IndexError:
+ raise TypeError(
+ f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given"
+ ) from None
+
+ given_params.update(kwargs.keys())
+
+ for variant in variants:
+ matches = all(param in given_params for param in variant)
+ if matches:
+ break
+ else: # no break
+ if len(variants) > 1:
+ variations = human_join(
+ ["(" + human_join([quote(arg) for arg in variant], final="and") + ")" for variant in variants]
+ )
+ msg = f"Missing required arguments; Expected either {variations} arguments to be given"
+ else:
+ # TODO: this error message is not deterministic
+ missing = list(set(variants[0]) - given_params)
+ if len(missing) > 1:
+ msg = f"Missing required arguments: {human_join([quote(arg) for arg in missing])}"
+ else:
+ msg = f"Missing required argument: {quote(missing[0])}"
+ raise TypeError(msg)
+ return func(*args, **kwargs)
+
+ return wrapper # type: ignore
+
+ return inner
+
+
+_K = TypeVar("_K")
+_V = TypeVar("_V")
+
+
+@overload
+def strip_not_given(obj: None) -> None: ...
+
+
+@overload
+def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]: ...
+
+
+@overload
+def strip_not_given(obj: object) -> object: ...
+
+
+def strip_not_given(obj: object | None) -> object:
+ """Remove all top-level keys where their values are instances of `NotGiven`"""
+ if obj is None:
+ return None
+
+ if not is_mapping(obj):
+ return obj
+
+ return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
+
+
+def coerce_integer(val: str) -> int:
+ return int(val, base=10)
+
+
+def coerce_float(val: str) -> float:
+ return float(val)
+
+
+def coerce_boolean(val: str) -> bool:
+ return val in {"true", "1", "on"}
+
+
+def maybe_coerce_integer(val: str | None) -> int | None:
+ if val is None:
+ return None
+ return coerce_integer(val)
+
+
+def maybe_coerce_float(val: str | None) -> float | None:
+ if val is None:
+ return None
+ return coerce_float(val)
+
+
+def maybe_coerce_boolean(val: str | None) -> bool | None:
+ if val is None:
+ return None
+ return coerce_boolean(val)
+
+
+def removeprefix(string: str, prefix: str) -> str:
+ """Remove a prefix from a string.
+
+ Backport of `str.removeprefix` for Python < 3.9
+ """
+ if string.startswith(prefix):
+ return string[len(prefix) :]
+ return string
+
+
+def removesuffix(string: str, suffix: str) -> str:
+ """Remove a suffix from a string.
+
+ Backport of `str.removesuffix` for Python < 3.9
+ """
+ if string.endswith(suffix):
+ return string[: -len(suffix)]
+ return string
+
+
+def file_from_path(path: str) -> FileTypes:
+ contents = Path(path).read_bytes()
+ file_name = os.path.basename(path)
+ return (file_name, contents)
+
+
+def get_required_header(headers: HeadersLike, header: str) -> str:
+ lower_header = header.lower()
+ if isinstance(headers, Mapping):
+ headers = cast(Headers, headers)
+ for k, v in headers.items():
+ if k.lower() == lower_header and isinstance(v, str):
+ return v
+
+ """ to deal with the case where the header looks like Stainless-Event-Id """
+ intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
+
+ for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
+ value = headers.get(normalized_header)
+ if value:
+ return value
+
+ raise ValueError(f"Could not find {header} header")
+
+
+def get_async_library() -> str:
+ try:
+ return sniffio.current_async_library()
+ except Exception:
+ return "false"
+
+
+def drop_prefix_image_data(content: Union[str, list[dict]]) -> Union[str, list[dict]]:
+ """
+ 删除 ;base64, 前缀
+ :param image_data:
+ :return:
+ """
+ if isinstance(content, list):
+ for data in content:
+ if data.get("type") == "image_url":
+ image_data = data.get("image_url").get("url")
+ if image_data.startswith("data:image/"):
+ image_data = image_data.split("base64,")[-1]
+ data["image_url"]["url"] = image_data
+
+ return content
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py
new file mode 100644
index 0000000000..e5fce94c00
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py
@@ -0,0 +1,78 @@
+import logging
+import os
+import time
+
+logger = logging.getLogger(__name__)
+
+
+class LoggerNameFilter(logging.Filter):
+ def filter(self, record):
+ # return record.name.startswith("loom_core") or record.name in "ERROR" or (
+ # record.name.startswith("uvicorn.error")
+ # and record.getMessage().startswith("Uvicorn running on")
+ # )
+ return True
+
+
+def get_log_file(log_path: str, sub_dir: str):
+ """
+ sub_dir should contain a timestamp.
+ """
+ log_dir = os.path.join(log_path, sub_dir)
+ # Here should be creating a new directory each time, so `exist_ok=False`
+ os.makedirs(log_dir, exist_ok=False)
+ return os.path.join(log_dir, "zhipuai.log")
+
+
+def get_config_dict(log_level: str, log_file_path: str, log_backup_count: int, log_max_bytes: int) -> dict:
+ # for windows, the path should be a raw string.
+ log_file_path = log_file_path.encode("unicode-escape").decode() if os.name == "nt" else log_file_path
+ log_level = log_level.upper()
+ config_dict = {
+ "version": 1,
+ "disable_existing_loggers": False,
+ "formatters": {
+ "formatter": {"format": ("%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s")},
+ },
+ "filters": {
+ "logger_name_filter": {
+ "()": __name__ + ".LoggerNameFilter",
+ },
+ },
+ "handlers": {
+ "stream_handler": {
+ "class": "logging.StreamHandler",
+ "formatter": "formatter",
+ "level": log_level,
+ # "stream": "ext://sys.stdout",
+ # "filters": ["logger_name_filter"],
+ },
+ "file_handler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "formatter": "formatter",
+ "level": log_level,
+ "filename": log_file_path,
+ "mode": "a",
+ "maxBytes": log_max_bytes,
+ "backupCount": log_backup_count,
+ "encoding": "utf8",
+ },
+ },
+ "loggers": {
+ "loom_core": {
+ "handlers": ["stream_handler", "file_handler"],
+ "level": log_level,
+ "propagate": False,
+ }
+ },
+ "root": {
+ "level": log_level,
+ "handlers": ["stream_handler", "file_handler"],
+ },
+ }
+ return config_dict
+
+
+def get_timestamp_ms():
+ t = time.time()
+ return int(round(t * 1000))
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py
new file mode 100644
index 0000000000..7f0b1b91d9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py
@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Any, Generic, Optional, TypeVar, cast
+
+from typing_extensions import Protocol, override, runtime_checkable
+
+from ._http_client import BasePage, BaseSyncPage, PageInfo
+
+__all__ = ["SyncPage", "SyncCursorPage"]
+
+_T = TypeVar("_T")
+
+
+@runtime_checkable
+class CursorPageItem(Protocol):
+ id: Optional[str]
+
+
+class SyncPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+ """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
+
+ data: list[_T]
+ object: str
+
+ @override
+ def _get_page_items(self) -> list[_T]:
+ data = self.data
+ if not data:
+ return []
+ return data
+
+ @override
+ def next_page_info(self) -> None:
+ """
+ This page represents a response that isn't actually paginated at the API level
+ so there will never be a next page.
+ """
+ return None
+
+
+class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+ data: list[_T]
+
+ @override
+ def _get_page_items(self) -> list[_T]:
+ data = self.data
+ if not data:
+ return []
+ return data
+
+ @override
+ def next_page_info(self) -> Optional[PageInfo]:
+ data = self.data
+ if not data:
+ return None
+
+ item = cast(Any, data[-1])
+ if not isinstance(item, CursorPageItem) or item.id is None:
+ # TODO emit warning log
+ return None
+
+ return PageInfo(params={"after": item.id})
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py
new file mode 100644
index 0000000000..9f941fb91c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py
@@ -0,0 +1,5 @@
+from .assistant_completion import AssistantCompletion
+
+__all__ = [
+ "AssistantCompletion",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py
new file mode 100644
index 0000000000..cbfb6edaeb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py
@@ -0,0 +1,40 @@
+from typing import Any, Optional
+
+from ...core import BaseModel
+from .message import MessageContent
+
+__all__ = ["AssistantCompletion", "CompletionUsage"]
+
+
+class ErrorInfo(BaseModel):
+ code: str # 错误码
+ message: str # 错误信息
+
+
+class AssistantChoice(BaseModel):
+ index: int # 结果下标
+ delta: MessageContent # 当前会话输出消息体
+ finish_reason: str
+ """
+ # 推理结束原因 stop代表推理自然结束或触发停止词。 sensitive 代表模型推理内容被安全审核接口拦截。请注意,针对此类内容,请用户自行判断并决定是否撤回已公开的内容。
+ # network_error 代表模型推理服务异常。
+ """ # noqa: E501
+ metadata: dict # 元信息,拓展字段
+
+
+class CompletionUsage(BaseModel):
+ prompt_tokens: int # 输入的 tokens 数量
+ completion_tokens: int # 输出的 tokens 数量
+ total_tokens: int # 总 tokens 数量
+
+
+class AssistantCompletion(BaseModel):
+ id: str # 请求 ID
+ conversation_id: str # 会话 ID
+ assistant_id: str # 智能体 ID
+ created: int # 请求创建时间,Unix 时间戳
+ status: str # 返回状态,包括:`completed` 表示生成结束`in_progress`表示生成中 `failed` 表示生成异常
+ last_error: Optional[ErrorInfo] # 异常信息
+ choices: list[AssistantChoice] # 增量返回的信息
+ metadata: Optional[dict[str, Any]] # 元信息,拓展字段
+ usage: Optional[CompletionUsage] # tokens 数量统计
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py
new file mode 100644
index 0000000000..03f14f4238
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py
@@ -0,0 +1,7 @@
+from typing import TypedDict
+
+
+class ConversationParameters(TypedDict, total=False):
+ assistant_id: str # 智能体 ID
+ page: int # 当前分页
+ page_size: int # 分页数量
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py
new file mode 100644
index 0000000000..d1833d220a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py
@@ -0,0 +1,29 @@
+from ...core import BaseModel
+
+__all__ = ["ConversationUsageListResp"]
+
+
+class Usage(BaseModel):
+ prompt_tokens: int # 用户输入的 tokens 数量
+ completion_tokens: int # 模型输入的 tokens 数量
+ total_tokens: int # 总 tokens 数量
+
+
+class ConversationUsage(BaseModel):
+ id: str # 会话 id
+ assistant_id: str # 智能体Assistant id
+ create_time: int # 创建时间
+ update_time: int # 更新时间
+ usage: Usage # 会话中 tokens 数量统计
+
+
+class ConversationUsageList(BaseModel):
+ assistant_id: str # 智能体id
+ has_more: bool # 是否还有更多页
+ conversation_list: list[ConversationUsage] # 返回的
+
+
+class ConversationUsageListResp(BaseModel):
+ code: int
+ msg: str
+ data: ConversationUsageList
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py
new file mode 100644
index 0000000000..2def1025cd
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py
@@ -0,0 +1,32 @@
+from typing import Optional, TypedDict, Union
+
+
+class AssistantAttachments:
+ file_id: str
+
+
+class MessageTextContent:
+ type: str # 目前支持 type = text
+ text: str
+
+
+MessageContent = Union[MessageTextContent]
+
+
+class ConversationMessage(TypedDict):
+ """会话消息体"""
+
+ role: str # 用户的输入角色,例如 'user'
+ content: list[MessageContent] # 会话消息体的内容
+
+
+class AssistantParameters(TypedDict, total=False):
+ """智能体参数类"""
+
+ assistant_id: str # 智能体 ID
+ conversation_id: Optional[str] # 会话 ID,不传则创建新会话
+ model: str # 模型名称,默认为 'GLM-4-Assistant'
+ stream: bool # 是否支持流式 SSE,需要传入 True
+ messages: list[ConversationMessage] # 会话消息体
+ attachments: Optional[list[AssistantAttachments]] # 会话指定的文件,非必填
+ metadata: Optional[dict] # 元信息,拓展字段,非必填
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py
new file mode 100644
index 0000000000..0709cdbcad
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py
@@ -0,0 +1,21 @@
+from ...core import BaseModel
+
+__all__ = ["AssistantSupportResp"]
+
+
+class AssistantSupport(BaseModel):
+ assistant_id: str # 智能体的 Assistant id,用于智能体会话
+ created_at: int # 创建时间
+ updated_at: int # 更新时间
+ name: str # 智能体名称
+ avatar: str # 智能体头像
+ description: str # 智能体描述
+ status: str # 智能体状态,目前只有 publish
+ tools: list[str] # 智能体支持的工具名
+ starter_prompts: list[str] # 智能体启动推荐的 prompt
+
+
+class AssistantSupportResp(BaseModel):
+ code: int
+ msg: str
+ data: list[AssistantSupport] # 智能体列表
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py
new file mode 100644
index 0000000000..562e0151e5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py
@@ -0,0 +1,3 @@
+from .message_content import MessageContent
+
+__all__ = ["MessageContent"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py
new file mode 100644
index 0000000000..6a1a438a6f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py
@@ -0,0 +1,13 @@
+from typing import Annotated, TypeAlias, Union
+
+from ....core._utils import PropertyInfo
+from .text_content_block import TextContentBlock
+from .tools_delta_block import ToolsDeltaBlock
+
+__all__ = ["MessageContent"]
+
+
+MessageContent: TypeAlias = Annotated[
+ Union[ToolsDeltaBlock, TextContentBlock],
+ PropertyInfo(discriminator="type"),
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py
new file mode 100644
index 0000000000..865fb1139e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py
@@ -0,0 +1,14 @@
+from typing import Literal
+
+from ....core import BaseModel
+
+__all__ = ["TextContentBlock"]
+
+
+class TextContentBlock(BaseModel):
+ content: str
+
+ role: str = "assistant"
+
+ type: Literal["content"] = "content"
+ """Always `content`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py
new file mode 100644
index 0000000000..9d569b282e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py
@@ -0,0 +1,27 @@
+from typing import Literal
+
+__all__ = ["CodeInterpreterToolBlock"]
+
+from .....core import BaseModel
+
+
+class CodeInterpreterToolOutput(BaseModel):
+ """代码工具输出结果"""
+
+ type: str # 代码执行日志,目前只有 logs
+ logs: str # 代码执行的日志结果
+ error_msg: str # 错误信息
+
+
+class CodeInterpreter(BaseModel):
+ """代码解释器"""
+
+ input: str # 生成的代码片段,输入给代码沙盒
+ outputs: list[CodeInterpreterToolOutput] # 代码执行后的输出结果
+
+
+class CodeInterpreterToolBlock(BaseModel):
+ """代码工具块"""
+
+ code_interpreter: CodeInterpreter # 代码解释器对象
+ type: Literal["code_interpreter"] # 调用工具的类型,始终为 `code_interpreter`
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py
new file mode 100644
index 0000000000..0b6895556b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py
@@ -0,0 +1,21 @@
+from typing import Literal
+
+from .....core import BaseModel
+
+__all__ = ["DrawingToolBlock"]
+
+
+class DrawingToolOutput(BaseModel):
+ image: str
+
+
+class DrawingTool(BaseModel):
+ input: str
+ outputs: list[DrawingToolOutput]
+
+
+class DrawingToolBlock(BaseModel):
+ drawing_tool: DrawingTool
+
+ type: Literal["drawing_tool"]
+ """Always `drawing_tool`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py
new file mode 100644
index 0000000000..c439bc4b3f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py
@@ -0,0 +1,22 @@
+from typing import Literal, Union
+
+__all__ = ["FunctionToolBlock"]
+
+from .....core import BaseModel
+
+
+class FunctionToolOutput(BaseModel):
+ content: str
+
+
+class FunctionTool(BaseModel):
+ name: str
+ arguments: Union[str, dict]
+ outputs: list[FunctionToolOutput]
+
+
+class FunctionToolBlock(BaseModel):
+ function: FunctionTool
+
+ type: Literal["function"]
+ """Always `drawing_tool`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py
new file mode 100644
index 0000000000..4789e9378a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py
@@ -0,0 +1,41 @@
+from typing import Literal
+
+from .....core import BaseModel
+
+
+class RetrievalToolOutput(BaseModel):
+ """
+ This class represents the output of a retrieval tool.
+
+ Attributes:
+ - text (str): The text snippet retrieved from the knowledge base.
+ - document (str): The name of the document from which the text snippet was retrieved, returned only in intelligent configuration.
+ """ # noqa: E501
+
+ text: str
+ document: str
+
+
+class RetrievalTool(BaseModel):
+ """
+ This class represents the outputs of a retrieval tool.
+
+ Attributes:
+ - outputs (List[RetrievalToolOutput]): A list of text snippets and their respective document names retrieved from the knowledge base.
+ """ # noqa: E501
+
+ outputs: list[RetrievalToolOutput]
+
+
+class RetrievalToolBlock(BaseModel):
+ """
+ This class represents a block for invoking the retrieval tool.
+
+ Attributes:
+ - retrieval (RetrievalTool): An instance of the RetrievalTool class containing the retrieval outputs.
+ - type (Literal["retrieval"]): The type of tool being used, always set to "retrieval".
+ """
+
+ retrieval: RetrievalTool
+ type: Literal["retrieval"]
+ """Always `retrieval`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py
new file mode 100644
index 0000000000..98544053d4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py
@@ -0,0 +1,16 @@
+from typing import Annotated, TypeAlias, Union
+
+from .....core._utils import PropertyInfo
+from .code_interpreter_delta_block import CodeInterpreterToolBlock
+from .drawing_tool_delta_block import DrawingToolBlock
+from .function_delta_block import FunctionToolBlock
+from .retrieval_delta_black import RetrievalToolBlock
+from .web_browser_delta_block import WebBrowserToolBlock
+
+__all__ = ["ToolsType"]
+
+
+ToolsType: TypeAlias = Annotated[
+ Union[DrawingToolBlock, CodeInterpreterToolBlock, WebBrowserToolBlock, RetrievalToolBlock, FunctionToolBlock],
+ PropertyInfo(discriminator="type"),
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py
new file mode 100644
index 0000000000..966e6fe0c8
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py
@@ -0,0 +1,48 @@
+from typing import Literal
+
+from .....core import BaseModel
+
+__all__ = ["WebBrowserToolBlock"]
+
+
+class WebBrowserOutput(BaseModel):
+ """
+ This class represents the output of a web browser search result.
+
+ Attributes:
+ - title (str): The title of the search result.
+ - link (str): The URL link to the search result's webpage.
+ - content (str): The textual content extracted from the search result.
+ - error_msg (str): Any error message encountered during the search or retrieval process.
+ """
+
+ title: str
+ link: str
+ content: str
+ error_msg: str
+
+
+class WebBrowser(BaseModel):
+ """
+ This class represents the input and outputs of a web browser search.
+
+ Attributes:
+ - input (str): The input query for the web browser search.
+ - outputs (List[WebBrowserOutput]): A list of search results returned by the web browser.
+ """
+
+ input: str
+ outputs: list[WebBrowserOutput]
+
+
+class WebBrowserToolBlock(BaseModel):
+ """
+ This class represents a block for invoking the web browser tool.
+
+ Attributes:
+ - web_browser (WebBrowser): An instance of the WebBrowser class containing the search input and outputs.
+ - type (Literal["web_browser"]): The type of tool being used, always set to "web_browser".
+ """
+
+ web_browser: WebBrowser
+ type: Literal["web_browser"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py
new file mode 100644
index 0000000000..781a1ab819
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py
@@ -0,0 +1,16 @@
+from typing import Literal
+
+from ....core import BaseModel
+from .tools.tools_type import ToolsType
+
+__all__ = ["ToolsDeltaBlock"]
+
+
+class ToolsDeltaBlock(BaseModel):
+ tool_calls: list[ToolsType]
+ """The index of the content part in the message."""
+
+ role: str = "tool"
+
+ type: Literal["tool_calls"] = "tool_calls"
+ """Always `tool_calls`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py
new file mode 100644
index 0000000000..560562915c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py
@@ -0,0 +1,82 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Literal, Optional
+
+from ..core import BaseModel
+from .batch_error import BatchError
+from .batch_request_counts import BatchRequestCounts
+
+__all__ = ["Batch", "Errors"]
+
+
+class Errors(BaseModel):
+ data: Optional[list[BatchError]] = None
+
+ object: Optional[str] = None
+ """这个类型,一直是`list`。"""
+
+
+class Batch(BaseModel):
+ id: str
+
+ completion_window: str
+ """用于执行请求的地址信息。"""
+
+ created_at: int
+ """这是 Unix timestamp (in seconds) 表示的创建时间。"""
+
+ endpoint: str
+ """这是ZhipuAI endpoint的地址。"""
+
+ input_file_id: str
+ """标记为batch的输入文件的ID。"""
+
+ object: Literal["batch"]
+ """这个类型,一直是`batch`."""
+
+ status: Literal[
+ "validating", "failed", "in_progress", "finalizing", "completed", "expired", "cancelling", "cancelled"
+ ]
+ """batch 的状态。"""
+
+ cancelled_at: Optional[int] = None
+ """Unix timestamp (in seconds) 表示的取消时间。"""
+
+ cancelling_at: Optional[int] = None
+ """Unix timestamp (in seconds) 表示发起取消的请求时间 """
+
+ completed_at: Optional[int] = None
+ """Unix timestamp (in seconds) 表示的完成时间。"""
+
+ error_file_id: Optional[str] = None
+ """这个文件id包含了执行请求失败的请求的输出。"""
+
+ errors: Optional[Errors] = None
+
+ expired_at: Optional[int] = None
+ """Unix timestamp (in seconds) 表示的将在过期时间。"""
+
+ expires_at: Optional[int] = None
+ """Unix timestamp (in seconds) 触发过期"""
+
+ failed_at: Optional[int] = None
+ """Unix timestamp (in seconds) 表示的失败时间。"""
+
+ finalizing_at: Optional[int] = None
+ """Unix timestamp (in seconds) 表示的最终时间。"""
+
+ in_progress_at: Optional[int] = None
+ """Unix timestamp (in seconds) 表示的开始处理时间。"""
+
+ metadata: Optional[builtins.object] = None
+ """
+ key:value形式的元数据,以便将信息存储
+ 结构化格式。键的长度是64个字符,值最长512个字符
+ """
+
+ output_file_id: Optional[str] = None
+ """完成请求的输出文件的ID。"""
+
+ request_counts: Optional[BatchRequestCounts] = None
+ """批次中不同状态的请求计数"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py
new file mode 100644
index 0000000000..3dae65ea46
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["BatchCreateParams"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+ completion_window: Required[str]
+ """The time frame within which the batch should be processed.
+
+ Currently only `24h` is supported.
+ """
+
+ endpoint: Required[Literal["/v1/chat/completions", "/v1/embeddings"]]
+ """The endpoint to be used for all requests in the batch.
+
+ Currently `/v1/chat/completions` and `/v1/embeddings` are supported.
+ """
+
+ input_file_id: Required[str]
+ """The ID of an uploaded file that contains requests for the new batch.
+
+ See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+ for how to upload a file.
+
+ Your input file must be formatted as a
+ [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+ and must be uploaded with the purpose `batch`.
+ """
+
+ metadata: Optional[dict[str, str]]
+ """Optional custom metadata for the batch."""
+
+ auto_delete_input_file: Optional[bool]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py
new file mode 100644
index 0000000000..f934db1978
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..core import BaseModel
+
+__all__ = ["BatchError"]
+
+
+class BatchError(BaseModel):
+ code: Optional[str] = None
+ """定义的业务错误码"""
+
+ line: Optional[int] = None
+ """文件中的行号"""
+
+ message: Optional[str] = None
+ """关于对话文件中的错误的描述"""
+
+ param: Optional[str] = None
+ """参数名称,如果有的话"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py
new file mode 100644
index 0000000000..1a68167132
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+ after: str
+ """分页的游标,用于获取下一页的数据。
+
+ `after` 是一个指向当前页面的游标,用于获取下一页的数据。如果没有提供 `after`,则返回第一页的数据。
+ list.
+ """
+
+ limit: int
+ """这个参数用于限制返回的结果数量。
+
+ Limit 用于限制返回的结果数量。默认值为 10
+ """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py
new file mode 100644
index 0000000000..ca3ccae625
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py
@@ -0,0 +1,14 @@
+from ..core import BaseModel
+
+__all__ = ["BatchRequestCounts"]
+
+
+class BatchRequestCounts(BaseModel):
+ completed: int
+ """这个数字表示已经完成的请求。"""
+
+ failed: int
+ """这个数字表示失败的请求。"""
+
+ total: int
+ """这个数字表示总的请求。"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py
index a0645b0916..c1eed070f3 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py
@@ -1,10 +1,9 @@
from typing import Optional
-from pydantic import BaseModel
-
+from ...core import BaseModel
from .chat_completion import CompletionChoice, CompletionUsage
-__all__ = ["AsyncTaskStatus"]
+__all__ = ["AsyncTaskStatus", "AsyncCompletion"]
class AsyncTaskStatus(BaseModel):
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py
index 4b3a929a2b..1945a826cd 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py
@@ -1,6 +1,6 @@
from typing import Optional
-from pydantic import BaseModel
+from ...core import BaseModel
__all__ = ["Completion", "CompletionUsage"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py
index c250699741..27fad0008a 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py
@@ -1,8 +1,9 @@
-from typing import Optional
+from typing import Any, Optional
-from pydantic import BaseModel
+from ...core import BaseModel
__all__ = [
+ "CompletionUsage",
"ChatCompletionChunk",
"Choice",
"ChoiceDelta",
@@ -53,3 +54,4 @@ class ChatCompletionChunk(BaseModel):
created: Optional[int] = None
model: Optional[str] = None
usage: Optional[CompletionUsage] = None
+ extra_json: dict[str, Any]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py
new file mode 100644
index 0000000000..666b38855c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py
@@ -0,0 +1,146 @@
+from typing import Literal, Optional
+
+from typing_extensions import Required, TypedDict
+
+__all__ = [
+ "CodeGeexTarget",
+ "CodeGeexContext",
+ "CodeGeexExtra",
+]
+
+
+class CodeGeexTarget(TypedDict, total=False):
+ """补全的内容参数"""
+
+ path: Optional[str]
+ """文件路径"""
+ language: Required[
+ Literal[
+ "c",
+ "c++",
+ "cpp",
+ "c#",
+ "csharp",
+ "c-sharp",
+ "css",
+ "cuda",
+ "dart",
+ "lua",
+ "objectivec",
+ "objective-c",
+ "objective-c++",
+ "python",
+ "perl",
+ "prolog",
+ "swift",
+ "lisp",
+ "java",
+ "scala",
+ "tex",
+ "jsx",
+ "tsx",
+ "vue",
+ "markdown",
+ "html",
+ "php",
+ "js",
+ "javascript",
+ "typescript",
+ "go",
+ "shell",
+ "rust",
+ "sql",
+ "kotlin",
+ "vb",
+ "ruby",
+ "pascal",
+ "r",
+ "fortran",
+ "lean",
+ "matlab",
+ "delphi",
+ "scheme",
+ "basic",
+ "assembly",
+ "groovy",
+ "abap",
+ "gdscript",
+ "haskell",
+ "julia",
+ "elixir",
+ "excel",
+ "clojure",
+ "actionscript",
+ "solidity",
+ "powershell",
+ "erlang",
+ "cobol",
+ "alloy",
+ "awk",
+ "thrift",
+ "sparql",
+ "augeas",
+ "cmake",
+ "f-sharp",
+ "stan",
+ "isabelle",
+ "dockerfile",
+ "rmarkdown",
+ "literate-agda",
+ "tcl",
+ "glsl",
+ "antlr",
+ "verilog",
+ "racket",
+ "standard-ml",
+ "elm",
+ "yaml",
+ "smalltalk",
+ "ocaml",
+ "idris",
+ "visual-basic",
+ "protocol-buffer",
+ "bluespec",
+ "applescript",
+ "makefile",
+ "tcsh",
+ "maple",
+ "systemverilog",
+ "literate-coffeescript",
+ "vhdl",
+ "restructuredtext",
+ "sas",
+ "literate-haskell",
+ "java-server-pages",
+ "coffeescript",
+ "emacs-lisp",
+ "mathematica",
+ "xslt",
+ "zig",
+ "common-lisp",
+ "stata",
+ "agda",
+ "ada",
+ ]
+ ]
+ """代码语言类型,如python"""
+ code_prefix: Required[str]
+ """补全位置的前文"""
+ code_suffix: Required[str]
+ """补全位置的后文"""
+
+
+class CodeGeexContext(TypedDict, total=False):
+ """附加代码"""
+
+ path: Required[str]
+ """附加代码文件的路径"""
+ code: Required[str]
+ """附加的代码内容"""
+
+
+class CodeGeexExtra(TypedDict, total=False):
+ target: Required[CodeGeexTarget]
+ """补全的内容参数"""
+ contexts: Optional[list[CodeGeexContext]]
+ """附加代码"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py
index e01f2c815f..8425b5c866 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py
@@ -2,8 +2,7 @@ from __future__ import annotations
from typing import Optional
-from pydantic import BaseModel
-
+from ..core import BaseModel
from .chat.chat_completion import CompletionUsage
__all__ = ["Embedding", "EmbeddingsResponded"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py
new file mode 100644
index 0000000000..bbaf59e4d7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py
@@ -0,0 +1,5 @@
+from .file_deleted import FileDeleted
+from .file_object import FileObject, ListOfFileObject
+from .upload_detail import UploadDetail
+
+__all__ = ["FileObject", "ListOfFileObject", "UploadDetail", "FileDeleted"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py
new file mode 100644
index 0000000000..4ef93b1c05
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileCreateParams"]
+
+from ...core import FileTypes
+from . import UploadDetail
+
+
+class FileCreateParams(TypedDict, total=False):
+ file: FileTypes
+ """file和 upload_detail二选一必填"""
+
+ upload_detail: list[UploadDetail]
+ """file和 upload_detail二选一必填"""
+
+ purpose: Required[Literal["fine-tune", "retrieval", "batch"]]
+ """
+ 上传文件的用途,支持 "fine-tune和 "retrieval"
+ retrieval支持上传Doc、Docx、PDF、Xlsx、URL类型文件,且单个文件的大小不超过 5MB。
+ fine-tune支持上传.jsonl文件且当前单个文件的大小最大可为 100 MB ,文件中语料格式需满足微调指南中所描述的格式。
+ """
+ custom_separator: Optional[list[str]]
+ """
+ 当 purpose 为 retrieval 且文件类型为 pdf, url, docx 时上传,切片规则默认为 `\n`。
+ """
+ knowledge_id: str
+ """
+ 当文件上传目的为 retrieval 时,需要指定知识库ID进行上传。
+ """
+
+ sentence_size: int
+ """
+ 当文件上传目的为 retrieval 时,需要指定知识库ID进行上传。
+ """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py
new file mode 100644
index 0000000000..a384b1a69a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py
@@ -0,0 +1,13 @@
+from typing import Literal
+
+from ...core import BaseModel
+
+__all__ = ["FileDeleted"]
+
+
+class FileDeleted(BaseModel):
+ id: str
+
+ deleted: bool
+
+ object: Literal["file"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py
similarity index 86%
rename from api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py
rename to api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py
index 75f76fe969..8f9d0fbb8e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py
@@ -1,8 +1,8 @@
from typing import Optional
-from pydantic import BaseModel
+from ...core import BaseModel
-__all__ = ["FileObject"]
+__all__ = ["FileObject", "ListOfFileObject"]
class FileObject(BaseModel):
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py
new file mode 100644
index 0000000000..8f1ca5ce57
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py
@@ -0,0 +1,13 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+
+class UploadDetail(BaseModel):
+ url: str
+ knowledge_type: int
+ file_name: Optional[str] = None
+ sentence_size: Optional[int] = None
+ custom_separator: Optional[list[str]] = None
+ callback_url: Optional[str] = None
+ callback_header: Optional[dict[str, str]] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py
index 1d3930286b..75c7553dbe 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py
@@ -1,6 +1,6 @@
from typing import Optional, Union
-from pydantic import BaseModel
+from ...core import BaseModel
__all__ = ["FineTuningJob", "Error", "Hyperparameters", "ListOfFineTuningJob"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py
index e26b448534..f996cff114 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py
@@ -1,6 +1,6 @@
from typing import Optional, Union
-from pydantic import BaseModel
+from ...core import BaseModel
__all__ = ["FineTuningJobEvent", "Metric", "JobEvent"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py
new file mode 100644
index 0000000000..57d0d2511d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py
@@ -0,0 +1 @@
+from .fine_tuned_models import FineTunedModelsStatus
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py
new file mode 100644
index 0000000000..b286a5b577
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py
@@ -0,0 +1,13 @@
+from typing import ClassVar
+
+from ....core import PYDANTIC_V2, BaseModel, ConfigDict
+
+__all__ = ["FineTunedModelsStatus"]
+
+
+class FineTunedModelsStatus(BaseModel):
+ if PYDANTIC_V2:
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow", protected_namespaces=())
+ request_id: str # 请求id
+ model_name: str # 模型名称
+ delete_status: str # 删除状态 deleting(删除中), deleted (已删除)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py
index b352ce0954..3bcad0acab 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py
@@ -2,7 +2,7 @@ from __future__ import annotations
from typing import Optional
-from pydantic import BaseModel
+from ..core import BaseModel
__all__ = ["GeneratedImage", "ImagesResponded"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py
new file mode 100644
index 0000000000..8c81d703e2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py
@@ -0,0 +1,8 @@
+from .knowledge import KnowledgeInfo
+from .knowledge_used import KnowledgeStatistics, KnowledgeUsed
+
+__all__ = [
+ "KnowledgeInfo",
+ "KnowledgeStatistics",
+ "KnowledgeUsed",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
new file mode 100644
index 0000000000..59cb41d712
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
@@ -0,0 +1,8 @@
+from .document import DocumentData, DocumentFailedInfo, DocumentObject, DocumentSuccessInfo
+
+__all__ = [
+ "DocumentData",
+ "DocumentObject",
+ "DocumentSuccessInfo",
+ "DocumentFailedInfo",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
new file mode 100644
index 0000000000..980bc6f4a7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
@@ -0,0 +1,51 @@
+from typing import Optional
+
+from ....core import BaseModel
+
+__all__ = ["DocumentData", "DocumentObject", "DocumentSuccessInfo", "DocumentFailedInfo"]
+
+
+class DocumentSuccessInfo(BaseModel):
+ documentId: Optional[str] = None
+ """文件id"""
+ filename: Optional[str] = None
+ """文件名称"""
+
+
+class DocumentFailedInfo(BaseModel):
+ failReason: Optional[str] = None
+ """上传失败的原因,包括:文件格式不支持、文件大小超出限制、知识库容量已满、容量上限为 50 万字。"""
+ filename: Optional[str] = None
+ """文件名称"""
+ documentId: Optional[str] = None
+ """知识库id"""
+
+
+class DocumentObject(BaseModel):
+ """文档信息"""
+
+ successInfos: Optional[list[DocumentSuccessInfo]] = None
+ """上传成功的文件信息"""
+ failedInfos: Optional[list[DocumentFailedInfo]] = None
+ """上传失败的文件信息"""
+
+
+class DocumentDataFailInfo(BaseModel):
+ """失败原因"""
+
+ embedding_code: Optional[int] = (
+ None # 失败码 10001:知识不可用,知识库空间已达上限 10002:知识不可用,知识库空间已达上限(字数超出限制)
+ )
+ embedding_msg: Optional[str] = None # 失败原因
+
+
+class DocumentData(BaseModel):
+ id: str = None # 知识唯一id
+ custom_separator: list[str] = None # 切片规则
+ sentence_size: str = None # 切片大小
+ length: int = None # 文件大小(字节)
+ word_num: int = None # 文件字数
+ name: str = None # 文件名
+ url: str = None # 文件下载链接
+ embedding_stat: int = None # 0:向量化中 1:向量化完成 2:向量化失败
+ failInfo: Optional[DocumentDataFailInfo] = None # 失败原因 向量化失败embedding_stat=2的时候 会有此值
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py
new file mode 100644
index 0000000000..509cb3a451
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py
@@ -0,0 +1,29 @@
+from typing import Optional, TypedDict
+
+__all__ = ["DocumentEditParams"]
+
+
+class DocumentEditParams(TypedDict):
+ """
+ 知识参数类型定义
+
+ Attributes:
+ id (str): 知识ID
+ knowledge_type (int): 知识类型:
+ 1:文章知识: 支持pdf,url,docx
+ 2.问答知识-文档: 支持pdf,url,docx
+ 3.问答知识-表格: 支持xlsx
+ 4.商品库-表格: 支持xlsx
+ 5.自定义: 支持pdf,url,docx
+ custom_separator (Optional[List[str]]): 当前知识类型为自定义(knowledge_type=5)时的切片规则,默认\n
+ sentence_size (Optional[int]): 当前知识类型为自定义(knowledge_type=5)时的切片字数,取值范围: 20-2000,默认300
+ callback_url (Optional[str]): 回调地址
+ callback_header (Optional[dict]): 回调时携带的header
+ """
+
+ id: str
+ knowledge_type: int
+ custom_separator: Optional[list[str]]
+ sentence_size: Optional[int]
+ callback_url: Optional[str]
+ callback_header: Optional[dict[str, str]]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py
new file mode 100644
index 0000000000..910c8c045e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+
+class DocumentListParams(TypedDict, total=False):
+ """
+ 文件查询参数类型定义
+
+ Attributes:
+ purpose (Optional[str]): 文件用途
+ knowledge_id (Optional[str]): 当文件用途为 retrieval 时,需要提供查询的知识库ID
+ page (Optional[int]): 页,默认1
+ limit (Optional[int]): 查询文件列表数,默认10
+ after (Optional[str]): 查询指定fileID之后的文件列表(当文件用途为 fine-tune 时需要)
+ order (Optional[str]): 排序规则,可选值['desc', 'asc'],默认desc(当文件用途为 fine-tune 时需要)
+ """
+
+ purpose: Optional[str]
+ knowledge_id: Optional[str]
+ page: Optional[int]
+ limit: Optional[int]
+ after: Optional[str]
+ order: Optional[str]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py
new file mode 100644
index 0000000000..acae4fad9f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from ....core import BaseModel
+from . import DocumentData
+
+__all__ = ["DocumentPage"]
+
+
+class DocumentPage(BaseModel):
+ list: list[DocumentData]
+ object: str
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py
new file mode 100644
index 0000000000..bc6f159eb2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py
@@ -0,0 +1,21 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = ["KnowledgeInfo"]
+
+
+class KnowledgeInfo(BaseModel):
+ id: Optional[str] = None
+ """知识库唯一 id"""
+ embedding_id: Optional[str] = (
+ None # 知识库绑定的向量化模型 见模型列表 [内部服务开放接口文档](https://lslfd0slxc.feishu.cn/docx/YauWdbBiMopV0FxB7KncPWCEn8f#H15NduiQZo3ugmxnWQFcfAHpnQ4)
+ )
+ name: Optional[str] = None # 知识库名称 100字限制
+ customer_identifier: Optional[str] = None # 用户标识 长度32位以内
+ description: Optional[str] = None # 知识库描述 500字限制
+ background: Optional[str] = None # 背景颜色(给枚举)'blue', 'red', 'orange', 'purple', 'sky'
+ icon: Optional[str] = (
+ None # 知识库图标(给枚举) question: 问号、book: 书籍、seal: 印章、wrench: 扳手、tag: 标签、horn: 喇叭、house: 房子 # noqa: E501
+ )
+ bucket_id: Optional[str] = None # 桶id 限制32位
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py
new file mode 100644
index 0000000000..c3da201727
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from typing_extensions import TypedDict
+
+__all__ = ["KnowledgeBaseParams"]
+
+
+class KnowledgeBaseParams(TypedDict):
+ """
+ 知识库参数类型定义
+
+ Attributes:
+ embedding_id (int): 知识库绑定的向量化模型ID
+ name (str): 知识库名称,限制100字
+ customer_identifier (Optional[str]): 用户标识,长度32位以内
+ description (Optional[str]): 知识库描述,限制500字
+ background (Optional[Literal['blue', 'red', 'orange', 'purple', 'sky']]): 背景颜色
+ icon (Optional[Literal['question', 'book', 'seal', 'wrench', 'tag', 'horn', 'house']]): 知识库图标
+ bucket_id (Optional[str]): 桶ID,限制32位
+ """
+
+ embedding_id: int
+ name: str
+ customer_identifier: Optional[str]
+ description: Optional[str]
+ background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None
+ icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None
+ bucket_id: Optional[str]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py
new file mode 100644
index 0000000000..a221b28e46
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["KnowledgeListParams"]
+
+
+class KnowledgeListParams(TypedDict, total=False):
+ page: int = 1
+ """ 页码,默认 1,第一页
+ """
+
+ size: int = 10
+ """每页数量 默认10
+ """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py
new file mode 100644
index 0000000000..e462eddc55
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from ...core import BaseModel
+from . import KnowledgeInfo
+
+__all__ = ["KnowledgePage"]
+
+
+class KnowledgePage(BaseModel):
+ list: list[KnowledgeInfo]
+ object: str
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py
new file mode 100644
index 0000000000..cfda709702
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py
@@ -0,0 +1,21 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = ["KnowledgeStatistics", "KnowledgeUsed"]
+
+
+class KnowledgeStatistics(BaseModel):
+ """
+ 使用量统计
+ """
+
+ word_num: Optional[int] = None
+ length: Optional[int] = None
+
+
+class KnowledgeUsed(BaseModel):
+ used: Optional[KnowledgeStatistics] = None
+ """已使用量"""
+ total: Optional[KnowledgeStatistics] = None
+ """知识库总量"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py
new file mode 100644
index 0000000000..c9bd60419c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py
@@ -0,0 +1,3 @@
+from .sensitive_word_check import SensitiveWordCheckRequest
+
+__all__ = ["SensitiveWordCheckRequest"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py
new file mode 100644
index 0000000000..0c37d99e65
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py
@@ -0,0 +1,14 @@
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+
+class SensitiveWordCheckRequest(TypedDict, total=False):
+ type: Optional[str]
+ """敏感词类型,当前仅支持ALL"""
+ status: Optional[str]
+ """敏感词启用禁用状态
+ 启用:ENABLE
+ 禁用:DISABLE
+ 备注:默认开启敏感词校验,如果要关闭敏感词校验,需联系商务获取对应权限,否则敏感词禁用不生效。
+ """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py
new file mode 100644
index 0000000000..62f77344ee
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py
@@ -0,0 +1,9 @@
+from .web_search import (
+ SearchIntent,
+ SearchRecommend,
+ SearchResult,
+ WebSearch,
+)
+from .web_search_chunk import WebSearchChunk
+
+__all__ = ["WebSearch", "SearchIntent", "SearchResult", "SearchRecommend", "WebSearchChunk"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py
new file mode 100644
index 0000000000..b3a3b26f07
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from typing import Optional, Union
+
+from typing_extensions import TypedDict
+
+__all__ = ["WebSearchParams"]
+
+
+class WebSearchParams(TypedDict):
+ """
+ 工具名:web-search-pro参数类型定义
+
+ Attributes:
+ :param model: str, 模型名称
+ :param request_id: Optional[str], 请求ID
+ :param stream: Optional[bool], 是否流式
+ :param messages: Union[str, List[str], List[int], object, None],
+ 包含历史对话上下文的内容,按照 {"role": "user", "content": "你好"} 的json 数组形式进行传参
+ 当前版本仅支持 User Message 单轮对话,工具会理解User Message并进行搜索,
+ 请尽可能传入不带指令格式的用户原始提问,以提高搜索准确率。
+ :param scope: Optional[str], 指定搜索范围,全网、学术等,默认全网
+ :param location: Optional[str], 指定搜索用户地区 location 提高相关性
+ :param recent_days: Optional[int],支持指定返回 N 天(1-30)更新的搜索结果
+
+
+ """
+
+ model: str
+ request_id: Optional[str]
+ stream: Optional[bool]
+ messages: Union[str, list[str], list[int], object, None]
+ scope: Optional[str] = None
+ location: Optional[str] = None
+ recent_days: Optional[int] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py
new file mode 100644
index 0000000000..ac9fa3821e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py
@@ -0,0 +1,71 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = [
+ "WebSearch",
+ "SearchIntent",
+ "SearchResult",
+ "SearchRecommend",
+]
+
+
+class SearchIntent(BaseModel):
+ index: int
+ # 搜索轮次,默认为 0
+ query: str
+ # 搜索优化 query
+ intent: str
+ # 判断的意图类型
+ keywords: str
+ # 搜索关键词
+
+
+class SearchResult(BaseModel):
+ index: int
+ # 搜索轮次,默认为 0
+ title: str
+ # 标题
+ link: str
+ # 链接
+ content: str
+ # 内容
+ icon: str
+ # 图标
+ media: str
+ # 来源媒体
+ refer: str
+ # 角标序号 [ref_1]
+
+
+class SearchRecommend(BaseModel):
+ index: int
+ # 搜索轮次,默认为 0
+ query: str
+ # 推荐query
+
+
+class WebSearchMessageToolCall(BaseModel):
+ id: str
+ search_intent: Optional[SearchIntent]
+ search_result: Optional[SearchResult]
+ search_recommend: Optional[SearchRecommend]
+ type: str
+
+
+class WebSearchMessage(BaseModel):
+ role: str
+ tool_calls: Optional[list[WebSearchMessageToolCall]] = None
+
+
+class WebSearchChoice(BaseModel):
+ index: int
+ finish_reason: str
+ message: WebSearchMessage
+
+
+class WebSearch(BaseModel):
+ created: Optional[int] = None
+ choices: list[WebSearchChoice]
+ request_id: Optional[str] = None
+ id: Optional[str] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py
new file mode 100644
index 0000000000..7fb0e02bb5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py
@@ -0,0 +1,33 @@
+from typing import Optional
+
+from ...core import BaseModel
+from .web_search import SearchIntent, SearchRecommend, SearchResult
+
+__all__ = ["WebSearchChunk"]
+
+
+class ChoiceDeltaToolCall(BaseModel):
+ index: int
+ id: Optional[str] = None
+
+ search_intent: Optional[SearchIntent] = None
+ search_result: Optional[SearchResult] = None
+ search_recommend: Optional[SearchRecommend] = None
+ type: Optional[str] = None
+
+
+class ChoiceDelta(BaseModel):
+ role: Optional[str] = None
+ tool_calls: Optional[list[ChoiceDeltaToolCall]] = None
+
+
+class Choice(BaseModel):
+ delta: ChoiceDelta
+ finish_reason: Optional[str] = None
+ index: int
+
+
+class WebSearchChunk(BaseModel):
+ id: Optional[str] = None
+ choices: list[Choice]
+ created: Optional[int] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py
new file mode 100644
index 0000000000..b14072b1a7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py
@@ -0,0 +1,3 @@
+from .video_object import VideoObject, VideoResult
+
+__all__ = ["VideoObject", "VideoResult"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py
new file mode 100644
index 0000000000..f5489d708e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+__all__ = ["VideoCreateParams"]
+
+from ..sensitive_word_check import SensitiveWordCheckRequest
+
+
+class VideoCreateParams(TypedDict, total=False):
+ model: str
+ """模型编码"""
+ prompt: str
+ """所需视频的文本描述"""
+ image_url: str
+ """所需视频的文本描述"""
+ sensitive_word_check: Optional[SensitiveWordCheckRequest]
+ """支持 URL 或者 Base64、传入 image 奖进行图生视频
+ * 图片格式:
+ * 图片大小:"""
+ request_id: str
+ """由用户端传参,需保证唯一性;用于区分每次请求的唯一标识,用户端不传时平台会默认生成。"""
+
+ user_id: str
+ """用户端。"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py
new file mode 100644
index 0000000000..85c3844d8a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py
@@ -0,0 +1,30 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = ["VideoObject", "VideoResult"]
+
+
+class VideoResult(BaseModel):
+ url: str
+ """视频url"""
+ cover_image_url: str
+ """预览图"""
+
+
+class VideoObject(BaseModel):
+ id: Optional[str] = None
+ """智谱 AI 开放平台生成的任务订单号,调用请求结果接口时请使用此订单号"""
+
+ model: str
+ """模型名称"""
+
+ video_result: list[VideoResult]
+ """视频生成结果"""
+
+ task_status: str
+ """处理状态,PROCESSING(处理中),SUCCESS(成功),FAIL(失败)
+ 注:处理中状态需通过查询获取结果"""
+
+ request_id: str
+ """用户在客户端请求时提交的任务编号或者平台生成的任务编号"""
diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py
index f27a0af6e0..db6ce9d9c3 100644
--- a/api/core/ops/entities/trace_entity.py
+++ b/api/core/ops/entities/trace_entity.py
@@ -21,8 +21,7 @@ class BaseTraceInfo(BaseModel):
return None
if isinstance(v, str | dict | list):
return v
- else:
- return ""
+ return ""
class WorkflowTraceInfo(BaseTraceInfo):
diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py
index 6f17bade97..0200f4a32d 100644
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@@ -176,11 +176,18 @@ class OpsTraceManager:
return None
app: App = db.session.query(App).filter(App.id == app_id).first()
+
+ if app is None:
+ return None
+
app_ops_trace_config = json.loads(app.tracing) if app.tracing else None
- if app_ops_trace_config is not None:
- tracing_provider = app_ops_trace_config.get("tracing_provider")
- else:
+ if app_ops_trace_config is None:
+ return None
+
+ tracing_provider = app_ops_trace_config.get("tracing_provider")
+
+ if tracing_provider is None or tracing_provider not in provider_config_map:
return None
# decrypt_token
@@ -701,7 +708,7 @@ class TraceQueueManager:
trace_task.app_id = self.app_id
trace_manager_queue.put(trace_task)
except Exception as e:
- logging.debug(f"Error adding trace task: {e}")
+ logging.error(f"Error adding trace task: {e}")
finally:
self.start_timer()
@@ -720,7 +727,7 @@ class TraceQueueManager:
if tasks:
self.send_to_celery(tasks)
except Exception as e:
- logging.debug(f"Error processing trace tasks: {e}")
+ logging.error(f"Error processing trace tasks: {e}")
def start_timer(self):
global trace_manager_timer
diff --git a/api/core/ops/utils.py b/api/core/ops/utils.py
index 498685b342..3cd3fb5756 100644
--- a/api/core/ops/utils.py
+++ b/api/core/ops/utils.py
@@ -6,12 +6,15 @@ from models.model import Message
def filter_none_values(data: dict):
+ new_data = {}
for key, value in data.items():
if value is None:
continue
if isinstance(value, datetime):
- data[key] = value.isoformat()
- return {key: value for key, value in data.items() if value is not None}
+ new_data[key] = value.isoformat()
+ else:
+ new_data[key] = value
+ return new_data
def get_message_data(message_id):
diff --git a/api/core/prompt/utils/extract_thread_messages.py b/api/core/prompt/utils/extract_thread_messages.py
new file mode 100644
index 0000000000..e8b626499f
--- /dev/null
+++ b/api/core/prompt/utils/extract_thread_messages.py
@@ -0,0 +1,22 @@
+from constants import UUID_NIL
+
+
+def extract_thread_messages(messages: list[dict]) -> list[dict]:
+ thread_messages = []
+ next_message = None
+
+ for message in messages:
+ if not message.parent_message_id:
+ # If the message is regenerated and does not have a parent message, it is the start of a new thread
+ thread_messages.append(message)
+ break
+
+ if not next_message:
+ thread_messages.append(message)
+ next_message = message.parent_message_id
+ else:
+ if next_message in {message.id, UUID_NIL}:
+ thread_messages.append(message)
+ next_message = message.parent_message_id
+
+ return thread_messages
diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py
index 286ecd4c03..4603957d68 100644
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@@ -110,7 +110,7 @@ class DatasetRetrieval:
continue
# pass if dataset is not available
- if dataset and dataset.available_document_count == 0 and dataset.available_document_count == 0:
+ if dataset and dataset.available_document_count == 0:
continue
available_datasets.append(dataset)
@@ -468,7 +468,7 @@ class DatasetRetrieval:
continue
# pass if dataset is not available
- if dataset and dataset.available_document_count == 0 and dataset.available_document_count == 0:
+ if dataset and dataset.available_document_count == 0:
continue
available_datasets.append(dataset)
diff --git a/api/core/tools/README.md b/api/core/tools/README.md
index c7ee81422e..b5d0a30d34 100644
--- a/api/core/tools/README.md
+++ b/api/core/tools/README.md
@@ -9,10 +9,10 @@ The tools provided for Agents and Workflows are currently divided into two categ
- `Api-Based Tools` leverage third-party APIs for implementation. You don't need to code to integrate these -- simply provide interface definitions in formats like `OpenAPI` , `Swagger`, or the `OpenAI-plugin` on the front-end.
### Built-in Tool Providers
-
+
### API Tool Providers
-
+
## Tool Integration
diff --git a/api/core/tools/README_CN.md b/api/core/tools/README_CN.md
index fda5d0630c..7e18441131 100644
--- a/api/core/tools/README_CN.md
+++ b/api/core/tools/README_CN.md
@@ -12,10 +12,10 @@
- `Api-Based Tools` 基于API的工具,即通过调用第三方API实现的工具,`Api-Based Tool`不需要再额外定义,只需提供`OpenAPI` `Swagger` `OpenAI plugin`等接口文档即可。
### 内置工具供应商
-
+
### API工具供应商
-
+
## 工具接入
为了实现更灵活更强大的功能,Tools提供了一系列的接口,帮助开发者快速构建想要的工具,本文作为开发者的入门指南,将会以[快速接入](./docs/zh_Hans/tool_scale_out.md)和[高级接入](./docs/zh_Hans/advanced_scale_out.md)两部分介绍如何接入工具。
diff --git a/api/core/tools/README_JP.md b/api/core/tools/README_JP.md
new file mode 100644
index 0000000000..39d0bf1762
--- /dev/null
+++ b/api/core/tools/README_JP.md
@@ -0,0 +1,31 @@
+# Tools
+
+このモジュールは、Difyのエージェントアシスタントやワークフローで使用される組み込みツールを実装しています。このモジュールでは、フロントエンドのロジックを変更することなく、独自のツールを定義し表示することができます。この分離により、Difyの機能を容易に水平方向にスケールアウトできます。
+
+## 機能紹介
+
+エージェントとワークフロー向けに提供されるツールは、現在2つのカテゴリーに分類されています。
+
+- `Built-in Tools`はDify内部で実装され、エージェントとワークフローで使用するためにハードコードされています。
+- `Api-Based Tools`はサードパーティのAPIを利用して実装されています。これらを統合するためのコーディングは不要で、フロントエンドで
+ `OpenAPI`, `Swagger`または`OpenAI-plugin`などの形式でインターフェース定義を提供するだけです。
+
+### 組み込みツールプロバイダー
+
+
+
+### APIツールプロバイダー
+
+
+
+## ツールの統合
+
+開発者が柔軟で強力なツールを構築できるよう、2つのガイドを提供しています。
+
+### [クイック統合 👈🏻](./docs/ja_JP/tool_scale_out.md)
+
+クイック統合は、Google検索ツールの例を通じて、ツール統合の基本をすばやく理解できるようにすることを目的としています。
+
+### [高度な統合 👈🏻](./docs/ja_JP/advanced_scale_out.md)
+
+高度な統合では、モジュールインターフェースについてより深く掘り下げ、画像生成、複数ツールの組み合わせ、異なるツール間でのパラメーター、画像、ファイルのフロー管理など、より複雑な機能の実装方法を説明します。
\ No newline at end of file
diff --git a/api/core/tools/docs/en_US/tool_scale_out.md b/api/core/tools/docs/en_US/tool_scale_out.md
index 121b7a5a76..1deaf04a47 100644
--- a/api/core/tools/docs/en_US/tool_scale_out.md
+++ b/api/core/tools/docs/en_US/tool_scale_out.md
@@ -245,4 +245,4 @@ After the above steps are completed, we can see this tool on the frontend, and i
Of course, because google_search needs a credential, before using it, you also need to input your credentials on the frontend.
-
+
diff --git a/api/core/tools/docs/zh_Hans/images/index/image-1.png b/api/core/tools/docs/images/index/image-1.png
similarity index 100%
rename from api/core/tools/docs/zh_Hans/images/index/image-1.png
rename to api/core/tools/docs/images/index/image-1.png
diff --git a/api/core/tools/docs/zh_Hans/images/index/image-2.png b/api/core/tools/docs/images/index/image-2.png
similarity index 100%
rename from api/core/tools/docs/zh_Hans/images/index/image-2.png
rename to api/core/tools/docs/images/index/image-2.png
diff --git a/api/core/tools/docs/zh_Hans/images/index/image.png b/api/core/tools/docs/images/index/image.png
similarity index 100%
rename from api/core/tools/docs/zh_Hans/images/index/image.png
rename to api/core/tools/docs/images/index/image.png
diff --git a/api/core/tools/docs/ja_JP/advanced_scale_out.md b/api/core/tools/docs/ja_JP/advanced_scale_out.md
new file mode 100644
index 0000000000..96f843354f
--- /dev/null
+++ b/api/core/tools/docs/ja_JP/advanced_scale_out.md
@@ -0,0 +1,283 @@
+# 高度なツール統合
+
+このガイドを始める前に、Difyのツール統合プロセスの基本を理解していることを確認してください。簡単な概要については[クイック統合](./tool_scale_out.md)をご覧ください。
+
+## ツールインターフェース
+
+より複雑なツールを迅速に構築するのを支援するため、`Tool`クラスに一連のヘルパーメソッドを定義しています。
+
+### メッセージの返却
+
+Difyは`テキスト`、`リンク`、`画像`、`ファイルBLOB`、`JSON`などの様々なメッセージタイプをサポートしています。以下のインターフェースを通じて、異なるタイプのメッセージをLLMとユーザーに返すことができます。
+
+注意:以下のインターフェースの一部のパラメータについては、後のセクションで説明します。
+
+#### 画像URL
+画像のURLを渡すだけで、Difyが自動的に画像をダウンロードしてユーザーに返します。
+
+```python
+ def create_image_message(self, image: str, save_as: str = '') -> ToolInvokeMessage:
+ """
+ create an image message
+
+ :param image: the url of the image
+ :param save_as: save as
+ :return: the image message
+ """
+```
+
+#### リンク
+リンクを返す必要がある場合は、以下のインターフェースを使用できます。
+
+```python
+ def create_link_message(self, link: str, save_as: str = '') -> ToolInvokeMessage:
+ """
+ create a link message
+
+ :param link: the url of the link
+ :param save_as: save as
+ :return: the link message
+ """
+```
+
+#### テキスト
+テキストメッセージを返す必要がある場合は、以下のインターフェースを使用できます。
+
+```python
+ def create_text_message(self, text: str, save_as: str = '') -> ToolInvokeMessage:
+ """
+ create a text message
+
+ :param text: the text of the message
+ :param save_as: save as
+ :return: the text message
+ """
+```
+
+#### ファイルBLOB
+画像、音声、動画、PPT、Word、Excelなどのファイルの生データを返す必要がある場合は、以下のインターフェースを使用できます。
+
+- `blob` ファイルの生データ(bytes型)
+- `meta` ファイルのメタデータ。ファイルの種類が分かっている場合は、`mime_type`を渡すことをお勧めします。そうでない場合、Difyはデフォルトタイプとして`octet/stream`を使用します。
+
+```python
+ def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
+ """
+ create a blob message
+
+ :param blob: the blob
+ :param meta: meta
+ :param save_as: save as
+ :return: the blob message
+ """
+```
+
+#### JSON
+フォーマットされたJSONを返す必要がある場合は、以下のインターフェースを使用できます。これは通常、ワークフロー内のノード間のデータ伝送に使用されますが、エージェントモードでは、ほとんどの大規模言語モデルもJSONを読み取り、理解することができます。
+
+- `object` Pythonの辞書オブジェクトで、自動的にJSONにシリアライズされます。
+
+```python
+ def create_json_message(self, object: dict) -> ToolInvokeMessage:
+ """
+ create a json message
+ """
+```
+
+### ショートカットツール
+
+大規模モデルアプリケーションでは、以下の2つの一般的なニーズがあります:
+- まず長いテキストを事前に要約し、その要約内容をLLMに渡すことで、元のテキストが長すぎてLLMが処理できない問題を防ぐ
+- ツールが取得したコンテンツがリンクである場合、Webページ情報をクロールしてからLLMに返す必要がある
+
+開発者がこれら2つのニーズを迅速に実装できるよう、以下の2つのショートカットツールを提供しています。
+
+#### テキスト要約ツール
+
+このツールはuser_idと要約するテキストを入力として受け取り、要約されたテキストを返します。Difyは現在のワークスペースのデフォルトモデルを使用して長文を要約します。
+
+```python
+ def summary(self, user_id: str, content: str) -> str:
+ """
+ summary the content
+
+ :param user_id: the user id
+ :param content: the content
+ :return: the summary
+ """
+```
+
+#### Webページクローリングツール
+
+このツールはクロールするWebページのリンクとユーザーエージェント(空でも可)を入力として受け取り、そのWebページの情報を含む文字列を返します。`user_agent`はオプションのパラメータで、ツールを識別するために使用できます。渡さない場合、Difyはデフォルトの`user_agent`を使用します。
+
+```python
+ def get_url(self, url: str, user_agent: str = None) -> str:
+ """
+ get url from the crawled result
+ """
+```
+
+### 変数プール
+
+`Tool`内に変数プールを導入し、ツールの実行中に生成された変数やファイルなどを保存します。これらの変数は、ツールの実行中に他のツールが使用することができます。
+
+次に、`DallE3`と`Vectorizer.AI`を例に、変数プールの使用方法を紹介します。
+
+- `DallE3`は画像生成ツールで、テキストに基づいて画像を生成できます。ここでは、`DallE3`にカフェのロゴを生成させます。
+- `Vectorizer.AI`はベクター画像変換ツールで、画像をベクター画像に変換できるため、画像を無限に拡大しても品質が損なわれません。ここでは、`DallE3`が生成したPNGアイコンをベクター画像に変換し、デザイナーが実際に使用できるようにします。
+
+#### DallE3
+まず、DallE3を使用します。画像を作成した後、その画像を変数プールに保存します。コードは以下の通りです:
+
+```python
+from typing import Any, Dict, List, Union
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+from base64 import b64decode
+
+from openai import OpenAI
+
+class DallE3Tool(BuiltinTool):
+ def _invoke(self,
+ user_id: str,
+ tool_parameters: Dict[str, Any],
+ ) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+ """
+ invoke tools
+ """
+ client = OpenAI(
+ api_key=self.runtime.credentials['openai_api_key'],
+ )
+
+ # prompt
+ prompt = tool_parameters.get('prompt', '')
+ if not prompt:
+ return self.create_text_message('Please input prompt')
+
+ # call openapi dalle3
+ response = client.images.generate(
+ prompt=prompt, model='dall-e-3',
+ size='1024x1024', n=1, style='vivid', quality='standard',
+ response_format='b64_json'
+ )
+
+ result = []
+ for image in response.data:
+ # Save all images to the variable pool through the save_as parameter. The variable name is self.VARIABLE_KEY.IMAGE.value. If new images are generated later, they will overwrite the previous images.
+ result.append(self.create_blob_message(blob=b64decode(image.b64_json),
+ meta={ 'mime_type': 'image/png' },
+ save_as=self.VARIABLE_KEY.IMAGE.value))
+
+ return result
+```
+
+ここでは画像の変数名として`self.VARIABLE_KEY.IMAGE.value`を使用していることに注意してください。開発者のツールが互いに連携できるよう、この`KEY`を定義しました。自由に使用することも、この`KEY`を使用しないこともできます。カスタムのKEYを渡すこともできます。
+
+#### Vectorizer.AI
+次に、Vectorizer.AIを使用して、DallE3が生成したPNGアイコンをベクター画像に変換します。ここで定義した関数を見てみましょう。コードは以下の通りです:
+
+```python
+from core.tools.tool.builtin_tool import BuiltinTool
+from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter
+from core.tools.errors import ToolProviderCredentialValidationError
+
+from typing import Any, Dict, List, Union
+from httpx import post
+from base64 import b64decode
+
+class VectorizerTool(BuiltinTool):
+ def _invoke(self, user_id: str, tool_parameters: Dict[str, Any])
+ -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+ """
+ Tool invocation, the image variable name needs to be passed in from here, so that we can get the image from the variable pool
+ """
+
+
+ def get_runtime_parameters(self) -> List[ToolParameter]:
+ """
+ Override the tool parameter list, we can dynamically generate the parameter list based on the actual situation in the current variable pool, so that the LLM can generate the form based on the parameter list
+ """
+
+
+ def is_tool_available(self) -> bool:
+ """
+ Whether the current tool is available, if there is no image in the current variable pool, then we don't need to display this tool, just return False here
+ """
+```
+
+次に、これら3つの関数を実装します:
+
+```python
+from core.tools.tool.builtin_tool import BuiltinTool
+from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter
+from core.tools.errors import ToolProviderCredentialValidationError
+
+from typing import Any, Dict, List, Union
+from httpx import post
+from base64 import b64decode
+
+class VectorizerTool(BuiltinTool):
+ def _invoke(self, user_id: str, tool_parameters: Dict[str, Any])
+ -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+ """
+ invoke tools
+ """
+ api_key_name = self.runtime.credentials.get('api_key_name', None)
+ api_key_value = self.runtime.credentials.get('api_key_value', None)
+
+ if not api_key_name or not api_key_value:
+ raise ToolProviderCredentialValidationError('Please input api key name and value')
+
+ # Get image_id, the definition of image_id can be found in get_runtime_parameters
+ image_id = tool_parameters.get('image_id', '')
+ if not image_id:
+ return self.create_text_message('Please input image id')
+
+ # Get the image generated by DallE from the variable pool
+ image_binary = self.get_variable_file(self.VARIABLE_KEY.IMAGE)
+ if not image_binary:
+ return self.create_text_message('Image not found, please request user to generate image firstly.')
+
+ # Generate vector image
+ response = post(
+ 'https://vectorizer.ai/api/v1/vectorize',
+ files={ 'image': image_binary },
+ data={ 'mode': 'test' },
+ auth=(api_key_name, api_key_value),
+ timeout=30
+ )
+
+ if response.status_code != 200:
+ raise Exception(response.text)
+
+ return [
+ self.create_text_message('the vectorized svg is saved as an image.'),
+ self.create_blob_message(blob=response.content,
+ meta={'mime_type': 'image/svg+xml'})
+ ]
+
+ def get_runtime_parameters(self) -> List[ToolParameter]:
+ """
+ override the runtime parameters
+ """
+ # Here, we override the tool parameter list, define the image_id, and set its option list to all images in the current variable pool. The configuration here is consistent with the configuration in yaml.
+ return [
+ ToolParameter.get_simple_instance(
+ name='image_id',
+ llm_description=f'the image id that you want to vectorize, \
+ and the image id should be specified in \
+ {[i.name for i in self.list_default_image_variables()]}',
+ type=ToolParameter.ToolParameterType.SELECT,
+ required=True,
+ options=[i.name for i in self.list_default_image_variables()]
+ )
+ ]
+
+ def is_tool_available(self) -> bool:
+ # Only when there are images in the variable pool, the LLM needs to use this tool
+ return len(self.list_default_image_variables()) > 0
+```
+
+ここで注目すべきは、実際には`image_id`を使用していないことです。このツールを呼び出す際には、デフォルトの変数プールに必ず画像があると仮定し、直接`image_binary = self.get_variable_file(self.VARIABLE_KEY.IMAGE)`を使用して画像を取得しています。モデルの能力が弱い場合、開発者にもこの方法を推奨します。これにより、エラー許容度を効果的に向上させ、モデルが誤ったパラメータを渡すのを防ぐことができます。
\ No newline at end of file
diff --git a/api/core/tools/docs/ja_JP/tool_scale_out.md b/api/core/tools/docs/ja_JP/tool_scale_out.md
new file mode 100644
index 0000000000..a721023d00
--- /dev/null
+++ b/api/core/tools/docs/ja_JP/tool_scale_out.md
@@ -0,0 +1,240 @@
+# ツールの迅速な統合
+
+ここでは、GoogleSearchを例にツールを迅速に統合する方法を紹介します。
+
+## 1. ツールプロバイダーのyamlを準備する
+
+### 概要
+
+このyamlファイルには、プロバイダー名、アイコン、作者などの詳細情報が含まれ、フロントエンドでの柔軟な表示を可能にします。
+
+### 例
+
+`core/tools/provider/builtin`の下に`google`モジュール(フォルダ)を作成し、`google.yaml`を作成します。名前はモジュール名と一致している必要があります。
+
+以降、このツールに関するすべての操作はこのモジュール内で行います。
+
+```yaml
+identity: # ツールプロバイダーの基本情報
+ author: Dify # 作者
+ name: google # 名前(一意、他のプロバイダーと重複不可)
+ label: # フロントエンド表示用のラベル
+ en_US: Google # 英語ラベル
+ zh_Hans: Google # 中国語ラベル
+ description: # フロントエンド表示用の説明
+ en_US: Google # 英語説明
+ zh_Hans: Google # 中国語説明
+ icon: icon.svg # アイコン(現在のモジュールの_assetsフォルダに配置)
+ tags: # タグ(フロントエンド表示用)
+ - search
+```
+
+- `identity`フィールドは必須で、ツールプロバイダーの基本情報(作者、名前、ラベル、説明、アイコンなど)が含まれます。
+ - アイコンは現在のモジュールの`_assets`フォルダに配置する必要があります。[こちら](../../provider/builtin/google/_assets/icon.svg)を参照してください。
+ - タグはフロントエンドでの表示に使用され、ユーザーがこのツールプロバイダーを素早く見つけるのに役立ちます。現在サポートされているすべてのタグは以下の通りです:
+ ```python
+ class ToolLabelEnum(Enum):
+ SEARCH = 'search'
+ IMAGE = 'image'
+ VIDEOS = 'videos'
+ WEATHER = 'weather'
+ FINANCE = 'finance'
+ DESIGN = 'design'
+ TRAVEL = 'travel'
+ SOCIAL = 'social'
+ NEWS = 'news'
+ MEDICAL = 'medical'
+ PRODUCTIVITY = 'productivity'
+ EDUCATION = 'education'
+ BUSINESS = 'business'
+ ENTERTAINMENT = 'entertainment'
+ UTILITIES = 'utilities'
+ OTHER = 'other'
+ ```
+
+## 2. プロバイダーの認証情報を準備する
+
+GoogleはSerpApiが提供するAPIを使用するサードパーティツールであり、SerpApiを使用するにはAPI Keyが必要です。つまり、このツールを使用するには認証情報が必要です。一方、`wikipedia`のようなツールでは認証情報フィールドを記入する必要はありません。[こちら](../../provider/builtin/wikipedia/wikipedia.yaml)を参照してください。
+
+認証情報フィールドを設定すると、以下のようになります:
+
+```yaml
+identity:
+ author: Dify
+ name: google
+ label:
+ en_US: Google
+ zh_Hans: Google
+ description:
+ en_US: Google
+ zh_Hans: Google
+ icon: icon.svg
+credentials_for_provider: # 認証情報フィールド
+ serpapi_api_key: # 認証情報フィールド名
+ type: secret-input # 認証情報フィールドタイプ
+ required: true # 必須かどうか
+ label: # 認証情報フィールドラベル
+ en_US: SerpApi API key # 英語ラベル
+ zh_Hans: SerpApi API key # 中国語ラベル
+ placeholder: # 認証情報フィールドプレースホルダー
+ en_US: Please input your SerpApi API key # 英語プレースホルダー
+ zh_Hans: 请输入你的 SerpApi API key # 中国語プレースホルダー
+ help: # 認証情報フィールドヘルプテキスト
+ en_US: Get your SerpApi API key from SerpApi # 英語ヘルプテキスト
+ zh_Hans: 从 SerpApi 获取您的 SerpApi API key # 中国語ヘルプテキスト
+ url: https://serpapi.com/manage-api-key # 認証情報フィールドヘルプリンク
+```
+
+- `type`:認証情報フィールドタイプ。現在、`secret-input`、`text-input`、`select`の3種類をサポートしており、それぞれパスワード入力ボックス、テキスト入力ボックス、ドロップダウンボックスに対応します。`secret-input`の場合、フロントエンドで入力内容が隠され、バックエンドで入力内容が暗号化されます。
+
+## 3. ツールのyamlを準備する
+
+1つのプロバイダーの下に複数のツールを持つことができ、各ツールにはyamlファイルが必要です。このファイルにはツールの基本情報、パラメータ、出力などが含まれます。
+
+引き続きGoogleSearchを例に、`google`モジュールの下に`tools`モジュールを作成し、`tools/google_search.yaml`を作成します。内容は以下の通りです:
+
+```yaml
+identity: # ツールの基本情報
+ name: google_search # ツール名(一意、他のツールと重複不可)
+ author: Dify # 作者
+ label: # フロントエンド表示用のラベル
+ en_US: GoogleSearch # 英語ラベル
+ zh_Hans: 谷歌搜索 # 中国語ラベル
+description: # フロントエンド表示用の説明
+ human: # フロントエンド表示用の紹介(多言語対応)
+ en_US: A tool for performing a Google SERP search and extracting snippets and webpages. Input should be a search query.
+ zh_Hans: 一个用于执行 Google SERP 搜索并提取片段和网页的工具。输入应该是一个搜索查询。
+ llm: A tool for performing a Google SERP search and extracting snippets and webpages. Input should be a search query. # LLMに渡す紹介文。LLMがこのツールをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。
+parameters: # パラメータリスト
+ - name: query # パラメータ名
+ type: string # パラメータタイプ
+ required: true # 必須かどうか
+ label: # パラメータラベル
+ en_US: Query string # 英語ラベル
+ zh_Hans: 查询语句 # 中国語ラベル
+ human_description: # フロントエンド表示用の紹介(多言語対応)
+ en_US: used for searching
+ zh_Hans: 用于搜索网页内容
+ llm_description: key words for searching # LLMに渡す紹介文。LLMがこのパラメータをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。
+ form: llm # フォームタイプ。llmはこのパラメータがAgentによって推論される必要があることを意味し、フロントエンドではこのパラメータは表示されません。
+ - name: result_type
+ type: select # パラメータタイプ
+ required: true
+ options: # ドロップダウンボックスのオプション
+ - value: text
+ label:
+ en_US: text
+ zh_Hans: 文本
+ - value: link
+ label:
+ en_US: link
+ zh_Hans: 链接
+ default: link
+ label:
+ en_US: Result type
+ zh_Hans: 结果类型
+ human_description:
+ en_US: used for selecting the result type, text or link
+ zh_Hans: 用于选择结果类型,使用文本还是链接进行展示
+ form: form # フォームタイプ。formはこのパラメータが対話開始前にフロントエンドでユーザーによって入力される必要があることを意味します。
+```
+
+- `identity`フィールドは必須で、ツールの基本情報(名前、作者、ラベル、説明など)が含まれます。
+- `parameters` パラメータリスト
+ - `name`(必須)パラメータ名。一意で、他のパラメータと重複しないようにしてください。
+ - `type`(必須)パラメータタイプ。現在、`string`、`number`、`boolean`、`select`、`secret-input`の5種類をサポートしており、それぞれ文字列、数値、ブール値、ドロップダウンボックス、暗号化入力ボックスに対応します。機密情報には`secret-input`タイプの使用をお勧めします。
+ - `label`(必須)パラメータラベル。フロントエンド表示用です。
+ - `form`(必須)フォームタイプ。現在、`llm`と`form`の2種類をサポートしています。
+ - エージェントアプリケーションでは、`llm`はこのパラメータがLLM自身によって推論されることを示し、`form`はこのツールを使用するために事前に設定できるパラメータであることを示します。
+ - ワークフローアプリケーションでは、`llm`と`form`の両方がフロントエンドで入力する必要がありますが、`llm`のパラメータはツールノードの入力変数として使用されます。
+ - `required` パラメータが必須かどうかを示します。
+ - `llm`モードでは、パラメータが必須の場合、Agentはこのパラメータを推論する必要があります。
+ - `form`モードでは、パラメータが必須の場合、ユーザーは対話開始前にフロントエンドでこのパラメータを入力する必要があります。
+ - `options` パラメータオプション
+ - `llm`モードでは、DifyはすべてのオプションをLLMに渡し、LLMはこれらのオプションに基づいて推論できます。
+ - `form`モードで、`type`が`select`の場合、フロントエンドはこれらのオプションを表示します。
+ - `default` デフォルト値
+ - `min` 最小値。パラメータタイプが`number`の場合に設定できます。
+ - `max` 最大値。パラメータタイプが`number`の場合に設定できます。
+ - `human_description` フロントエンド表示用の紹介。多言語対応です。
+ - `placeholder` 入力ボックスのプロンプトテキスト。フォームタイプが`form`で、パラメータタイプが`string`、`number`、`secret-input`の場合に設定できます。多言語対応です。
+ - `llm_description` LLMに渡す紹介文。LLMがこのパラメータをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。
+
+## 4. ツールコードを準備する
+
+ツールの設定が完了したら、ツールのロジックを実装するコードを作成します。
+
+`google/tools`モジュールの下に`google_search.py`を作成し、内容は以下の通りです:
+
+```python
+from core.tools.tool.builtin_tool import BuiltinTool
+from core.tools.entities.tool_entities import ToolInvokeMessage
+
+from typing import Any, Dict, List, Union
+
+class GoogleSearchTool(BuiltinTool):
+ def _invoke(self,
+ user_id: str,
+ tool_parameters: Dict[str, Any],
+ ) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+ """
+ ツールを呼び出す
+ """
+ query = tool_parameters['query']
+ result_type = tool_parameters['result_type']
+ api_key = self.runtime.credentials['serpapi_api_key']
+ result = SerpAPI(api_key).run(query, result_type=result_type)
+
+ if result_type == 'text':
+ return self.create_text_message(text=result)
+ return self.create_link_message(link=result)
+```
+
+### パラメータ
+ツールの全体的なロジックは`_invoke`メソッドにあります。このメソッドは2つのパラメータ(`user_id`とtool_parameters`)を受け取り、それぞれユーザーIDとツールパラメータを表します。
+
+### 戻り値
+ツールの戻り値として、1つのメッセージまたは複数のメッセージを選択できます。ここでは1つのメッセージを返しています。`create_text_message`と`create_link_message`を使用して、テキストメッセージまたはリンクメッセージを作成できます。複数のメッセージを返す場合は、リストを構築できます(例:`[self.create_text_message('msg1'), self.create_text_message('msg2')]`)。
+
+## 5. プロバイダーコードを準備する
+
+最後に、プロバイダーモジュールの下にプロバイダークラスを作成し、プロバイダーの認証情報検証ロジックを実装する必要があります。認証情報の検証が失敗した場合、`ToolProviderCredentialValidationError`例外が発生します。
+
+`google`モジュールの下に`google.py`を作成し、内容は以下の通りです:
+
+```python
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+from core.tools.errors import ToolProviderCredentialValidationError
+
+from core.tools.provider.builtin.google.tools.google_search import GoogleSearchTool
+
+from typing import Any, Dict
+
+class GoogleProvider(BuiltinToolProviderController):
+ def _validate_credentials(self, credentials: Dict[str, Any]) -> None:
+ try:
+ # 1. ここでGoogleSearchTool()を使ってGoogleSearchToolをインスタンス化する必要があります。これによりGoogleSearchToolのyaml設定が自動的に読み込まれますが、この時点では認証情報は含まれていません
+ # 2. 次に、fork_tool_runtimeメソッドを使用して、現在の認証情報をGoogleSearchToolに渡す必要があります
+ # 3. 最後に、invokeを呼び出します。パラメータはGoogleSearchToolのyamlで設定されたパラメータルールに従って渡す必要があります
+ GoogleSearchTool().fork_tool_runtime(
+ meta={
+ "credentials": credentials,
+ }
+ ).invoke(
+ user_id='',
+ tool_parameters={
+ "query": "test",
+ "result_type": "link"
+ },
+ )
+ except Exception as e:
+ raise ToolProviderCredentialValidationError(str(e))
+```
+
+## 完了
+
+以上のステップが完了すると、このツールをフロントエンドで確認し、Agentで使用することができるようになります。
+
+もちろん、google_searchには認証情報が必要なため、使用する前にフロントエンドで認証情報を入力する必要があります。
+
+
\ No newline at end of file
diff --git a/api/core/tools/docs/zh_Hans/tool_scale_out.md b/api/core/tools/docs/zh_Hans/tool_scale_out.md
index 06a8d9a4f9..ec61e4677b 100644
--- a/api/core/tools/docs/zh_Hans/tool_scale_out.md
+++ b/api/core/tools/docs/zh_Hans/tool_scale_out.md
@@ -234,4 +234,4 @@ class GoogleProvider(BuiltinToolProviderController):
当然,因为google_search需要一个凭据,在使用之前,还需要在前端配置它的凭据。
-
+
diff --git a/api/core/tools/entities/common_entities.py b/api/core/tools/entities/common_entities.py
index 37a926697b..924e6fc0cf 100644
--- a/api/core/tools/entities/common_entities.py
+++ b/api/core/tools/entities/common_entities.py
@@ -1,6 +1,6 @@
from typing import Optional
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
class I18nObject(BaseModel):
@@ -8,16 +8,16 @@ class I18nObject(BaseModel):
Model class for i18n object.
"""
- zh_Hans: Optional[str] = None
- pt_BR: Optional[str] = None
en_US: str
+ zh_Hans: Optional[str] = Field(default=None)
+ pt_BR: Optional[str] = Field(default=None)
+ ja_JP: Optional[str] = Field(default=None)
def __init__(self, **data):
super().__init__(**data)
- if not self.zh_Hans:
- self.zh_Hans = self.en_US
- if not self.pt_BR:
- self.pt_BR = self.en_US
+ self.zh_Hans = self.zh_Hans or self.en_US
+ self.pt_BR = self.pt_BR or self.en_US
+ self.ja_JP = self.ja_JP or self.en_US
def to_dict(self) -> dict:
- return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR}
+ return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR, "ja_JP": self.ja_JP}
diff --git a/api/core/tools/provider/builtin/arxiv/arxiv.yaml b/api/core/tools/provider/builtin/arxiv/arxiv.yaml
index d26993b336..25aec97bb7 100644
--- a/api/core/tools/provider/builtin/arxiv/arxiv.yaml
+++ b/api/core/tools/provider/builtin/arxiv/arxiv.yaml
@@ -4,9 +4,11 @@ identity:
label:
en_US: ArXiv
zh_Hans: ArXiv
+ ja_JP: ArXiv
description:
en_US: Access to a vast repository of scientific papers and articles in various fields of research.
zh_Hans: 访问各个研究领域大量科学论文和文章的存储库。
+ ja_JP: 多様な研究分野の科学論文や記事の膨大なリポジトリへのアクセス。
icon: icon.svg
tags:
- search
diff --git a/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml b/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
index 7439a48658..afc1925df3 100644
--- a/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
+++ b/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
@@ -4,10 +4,12 @@ identity:
label:
en_US: Arxiv Search
zh_Hans: Arxiv 搜索
+ ja_JP: Arxiv 検索
description:
human:
en_US: A tool for searching scientific papers and articles from the Arxiv repository. Input can be an Arxiv ID or an author's name.
zh_Hans: 一个用于从Arxiv存储库搜索科学论文和文章的工具。 输入可以是Arxiv ID或作者姓名。
+ ja_JP: Arxivリポジトリから科学論文や記事を検索するためのツールです。入力はArxiv IDまたは著者名にすることができます。
llm: A tool for searching scientific papers and articles from the Arxiv repository. Input can be an Arxiv ID or an author's name.
parameters:
- name: query
@@ -16,8 +18,10 @@ parameters:
label:
en_US: Query string
zh_Hans: 查询字符串
+ ja_JP: クエリ文字列
human_description:
en_US: The Arxiv ID or author's name used for searching.
zh_Hans: 用于搜索的Arxiv ID或作者姓名。
+ ja_JP: 検索に使用されるArxiv IDまたは著者名。
llm_description: The Arxiv ID or author's name used for searching.
form: llm
diff --git a/api/core/tools/provider/builtin/cogview/tools/cogview3.py b/api/core/tools/provider/builtin/cogview/tools/cogview3.py
index 9039708588..085084ca38 100644
--- a/api/core/tools/provider/builtin/cogview/tools/cogview3.py
+++ b/api/core/tools/provider/builtin/cogview/tools/cogview3.py
@@ -21,15 +21,22 @@ class CogView3Tool(BuiltinTool):
)
size_mapping = {
"square": "1024x1024",
- "vertical": "1024x1792",
- "horizontal": "1792x1024",
+ "vertical_768": "768x1344",
+ "vertical_864": "864x1152",
+ "horizontal_1344": "1344x768",
+ "horizontal_1152": "1152x864",
+ "widescreen_1440": "1440x720",
+ "tallscreen_720": "720x1440",
}
# prompt
prompt = tool_parameters.get("prompt", "")
if not prompt:
return self.create_text_message("Please input prompt")
- # get size
- size = size_mapping[tool_parameters.get("size", "square")]
+ # get size key
+ size_key = tool_parameters.get("size", "square")
+ # cogview-3-plus get size
+ if size_key != "cogview_3":
+ size = size_mapping[size_key]
# get n
n = tool_parameters.get("n", 1)
# get quality
@@ -43,16 +50,29 @@ class CogView3Tool(BuiltinTool):
# set extra body
seed_id = tool_parameters.get("seed_id", self._generate_random_id(8))
extra_body = {"seed": seed_id}
- response = client.images.generations(
- prompt=prompt,
- model="cogview-3",
- size=size,
- n=n,
- extra_body=extra_body,
- style=style,
- quality=quality,
- response_format="b64_json",
- )
+ # cogview-3-plus
+ if size_key != "cogview_3":
+ response = client.images.generations(
+ prompt=prompt,
+ model="cogview-3-plus",
+ size=size,
+ n=n,
+ extra_body=extra_body,
+ style=style,
+ quality=quality,
+ response_format="b64_json",
+ )
+ # cogview-3
+ else:
+ response = client.images.generations(
+ prompt=prompt,
+ model="cogview-3",
+ n=n,
+ extra_body=extra_body,
+ style=style,
+ quality=quality,
+ response_format="b64_json",
+ )
result = []
for image in response.data:
result.append(self.create_image_message(image=image.url))
diff --git a/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml b/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml
index 1de3f599b6..9ab5c2729b 100644
--- a/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml
+++ b/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml
@@ -42,21 +42,46 @@ parameters:
pt_BR: Image size
form: form
options:
+ - value: cogview_3
+ label:
+ en_US: Square_cogview_3(1024x1024)
+ zh_Hans: 方_cogview_3(1024x1024)
+ pt_BR: Square_cogview_3(1024x1024)
- value: square
label:
- en_US: Squre(1024x1024)
+ en_US: Square(1024x1024)
zh_Hans: 方(1024x1024)
- pt_BR: Squre(1024x1024)
- - value: vertical
+ pt_BR: Square(1024x1024)
+ - value: vertical_768
label:
- en_US: Vertical(1024x1792)
- zh_Hans: 竖屏(1024x1792)
- pt_BR: Vertical(1024x1792)
- - value: horizontal
+ en_US: Vertical(768x1344)
+ zh_Hans: 竖屏(768x1344)
+ pt_BR: Vertical(768x1344)
+ - value: vertical_864
label:
- en_US: Horizontal(1792x1024)
- zh_Hans: 横屏(1792x1024)
- pt_BR: Horizontal(1792x1024)
+ en_US: Vertical(864x1152)
+ zh_Hans: 竖屏(864x1152)
+ pt_BR: Vertical(864x1152)
+ - value: horizontal_1344
+ label:
+ en_US: Horizontal(1344x768)
+ zh_Hans: 横屏(1344x768)
+ pt_BR: Horizontal(1344x768)
+ - value: horizontal_1152
+ label:
+ en_US: Horizontal(1152x864)
+ zh_Hans: 横屏(1152x864)
+ pt_BR: Horizontal(1152x864)
+ - value: widescreen_1440
+ label:
+ en_US: Widescreen(1440x720)
+ zh_Hans: 宽屏(1440x720)
+ pt_BR: Widescreen(1440x720)
+ - value: tallscreen_720
+ label:
+ en_US: Tallscreen(720x1440)
+ zh_Hans: 高屏(720x1440)
+ pt_BR: Tallscreen(720x1440)
default: square
- name: n
type: number
diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py
index 81fc8cc985..eaa4b0d027 100644
--- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py
+++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py
@@ -333,7 +333,7 @@ class ComfyuiStableDiffusionTool(BuiltinTool):
break
return self.create_blob_message(
- blob=image, meta={"mime_type": "image/png"}, save_as=self.VARIABLE_KEY.IMAGE.value
+ blob=image, meta={"mime_type": "image/png"}, save_as=self.VariableKey.IMAGE.value
)
except Exception as e:
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/crawl.py b/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
index 9675b8eb91..15ab510c6c 100644
--- a/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
+++ b/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
@@ -35,10 +35,10 @@ class CrawlTool(BuiltinTool):
scrapeOptions["excludeTags"] = get_array_params(tool_parameters, "excludeTags")
scrapeOptions["onlyMainContent"] = tool_parameters.get("onlyMainContent", False)
scrapeOptions["waitFor"] = tool_parameters.get("waitFor", 0)
- scrapeOptions = {k: v for k, v in scrapeOptions.items() if v not in {None, ""}}
+ scrapeOptions = {k: v for k, v in scrapeOptions.items() if v not in (None, "")}
payload["scrapeOptions"] = scrapeOptions or None
- payload = {k: v for k, v in payload.items() if v not in {None, ""}}
+ payload = {k: v for k, v in payload.items() if v not in (None, "")}
crawl_result = app.crawl_url(url=tool_parameters["url"], wait=wait_for_results, **payload)
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/scrape.py b/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
index 538b4a1fcb..f00a9b31ce 100644
--- a/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
+++ b/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
@@ -29,10 +29,10 @@ class ScrapeTool(BuiltinTool):
extract["schema"] = get_json_params(tool_parameters, "schema")
extract["systemPrompt"] = tool_parameters.get("systemPrompt")
extract["prompt"] = tool_parameters.get("prompt")
- extract = {k: v for k, v in extract.items() if v not in {None, ""}}
+ extract = {k: v for k, v in extract.items() if v not in (None, "")}
payload["extract"] = extract or None
- payload = {k: v for k, v in payload.items() if v not in {None, ""}}
+ payload = {k: v for k, v in payload.items() if v not in (None, "")}
crawl_result = app.scrape_url(url=tool_parameters["url"], **payload)
markdown_result = crawl_result.get("data", {}).get("markdown", "")
diff --git a/api/core/tools/provider/builtin/jina/jina.yaml b/api/core/tools/provider/builtin/jina/jina.yaml
index 06f23382d9..9ce5cbd6d1 100644
--- a/api/core/tools/provider/builtin/jina/jina.yaml
+++ b/api/core/tools/provider/builtin/jina/jina.yaml
@@ -1,10 +1,10 @@
identity:
author: Dify
- name: jina
+ name: Jina AI
label:
- en_US: Jina
- zh_Hans: Jina
- pt_BR: Jina
+ en_US: Jina AI
+ zh_Hans: Jina AI
+ pt_BR: Jina AI
description:
en_US: Convert any URL to an LLM-friendly input or perform searches on the web for grounding information. Experience improved output for your agent and RAG systems at no cost.
zh_Hans: 将任何URL转换为LLM易读的输入或在网页上搜索引擎上搜索引擎。
@@ -22,11 +22,11 @@ credentials_for_provider:
zh_Hans: API 密钥(可留空)
pt_BR: Chave API (deixe vazio se você não tiver uma)
placeholder:
- en_US: Please enter your Jina API key
- zh_Hans: 请输入你的 Jina API 密钥
- pt_BR: Por favor, insira sua chave de API do Jina
+ en_US: Please enter your Jina AI API key
+ zh_Hans: 请输入你的 Jina AI API 密钥
+ pt_BR: Por favor, insira sua chave de API do Jina AI
help:
- en_US: Get your Jina API key from Jina (optional, but you can get a higher rate)
- zh_Hans: 从 Jina 获取您的 Jina API 密钥(非必须,能得到更高的速率)
- pt_BR: Obtenha sua chave de API do Jina na Jina (opcional, mas você pode obter uma taxa mais alta)
+ en_US: Get your Jina AI API key from Jina AI (optional, but you can get a higher rate)
+ zh_Hans: 从 Jina AI 获取您的 Jina AI API 密钥(非必须,能得到更高的速率)
+ pt_BR: Obtenha sua chave de API do Jina AI na Jina AI (opcional, mas você pode obter uma taxa mais alta)
url: https://jina.ai
diff --git a/api/core/tools/provider/builtin/xinference/_assets/icon.png b/api/core/tools/provider/builtin/xinference/_assets/icon.png
new file mode 100644
index 0000000000..e58cacbd12
Binary files /dev/null and b/api/core/tools/provider/builtin/xinference/_assets/icon.png differ
diff --git a/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py
new file mode 100644
index 0000000000..847f2730f2
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py
@@ -0,0 +1,412 @@
+import io
+import json
+from base64 import b64decode, b64encode
+from copy import deepcopy
+from typing import Any, Union
+
+from httpx import get, post
+from PIL import Image
+from yarl import URL
+
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_entities import (
+ ToolInvokeMessage,
+ ToolParameter,
+ ToolParameterOption,
+)
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.tool.builtin_tool import BuiltinTool
+
+# All commented out parameters default to null
+DRAW_TEXT_OPTIONS = {
+ # Prompts
+ "prompt": "",
+ "negative_prompt": "",
+ # "styles": [],
+ # Seeds
+ "seed": -1,
+ "subseed": -1,
+ "subseed_strength": 0,
+ "seed_resize_from_h": -1,
+ "seed_resize_from_w": -1,
+ # Samplers
+ "sampler_name": "DPM++ 2M",
+ # "scheduler": "",
+ # "sampler_index": "Automatic",
+ # Latent Space Options
+ "batch_size": 1,
+ "n_iter": 1,
+ "steps": 10,
+ "cfg_scale": 7,
+ "width": 512,
+ "height": 512,
+ # "restore_faces": True,
+ # "tiling": True,
+ "do_not_save_samples": False,
+ "do_not_save_grid": False,
+ # "eta": 0,
+ # "denoising_strength": 0.75,
+ # "s_min_uncond": 0,
+ # "s_churn": 0,
+ # "s_tmax": 0,
+ # "s_tmin": 0,
+ # "s_noise": 0,
+ "override_settings": {},
+ "override_settings_restore_afterwards": True,
+ # Refinement Options
+ "refiner_checkpoint": "",
+ "refiner_switch_at": 0,
+ "disable_extra_networks": False,
+ # "firstpass_image": "",
+ # "comments": "",
+ # High-Resolution Options
+ "enable_hr": False,
+ "firstphase_width": 0,
+ "firstphase_height": 0,
+ "hr_scale": 2,
+ # "hr_upscaler": "",
+ "hr_second_pass_steps": 0,
+ "hr_resize_x": 0,
+ "hr_resize_y": 0,
+ # "hr_checkpoint_name": "",
+ # "hr_sampler_name": "",
+ # "hr_scheduler": "",
+ "hr_prompt": "",
+ "hr_negative_prompt": "",
+ # Task Options
+ # "force_task_id": "",
+ # Script Options
+ # "script_name": "",
+ "script_args": [],
+ # Output Options
+ "send_images": True,
+ "save_images": False,
+ "alwayson_scripts": {},
+ # "infotext": "",
+}
+
+
+class StableDiffusionTool(BuiltinTool):
+ def _invoke(
+ self, user_id: str, tool_parameters: dict[str, Any]
+ ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+ """
+ invoke tools
+ """
+ # base url
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ return self.create_text_message("Please input base_url")
+
+ if tool_parameters.get("model"):
+ self.runtime.credentials["model"] = tool_parameters["model"]
+
+ model = self.runtime.credentials.get("model", None)
+ if not model:
+ return self.create_text_message("Please input model")
+
+ # set model
+ try:
+ url = str(URL(base_url) / "sdapi" / "v1" / "options")
+ response = post(
+ url,
+ json={"sd_model_checkpoint": model},
+ headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+ )
+ if response.status_code != 200:
+ raise ToolProviderCredentialValidationError("Failed to set model, please tell user to set model")
+ except Exception as e:
+ raise ToolProviderCredentialValidationError("Failed to set model, please tell user to set model")
+
+ # get image id and image variable
+ image_id = tool_parameters.get("image_id", "")
+ image_variable = self.get_default_image_variable()
+ # Return text2img if there's no image ID or no image variable
+ if not image_id or not image_variable:
+ return self.text2img(base_url=base_url, tool_parameters=tool_parameters)
+
+ # Proceed with image-to-image generation
+ return self.img2img(base_url=base_url, tool_parameters=tool_parameters)
+
+ def validate_models(self):
+ """
+ validate models
+ """
+ try:
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ raise ToolProviderCredentialValidationError("Please input base_url")
+ model = self.runtime.credentials.get("model", None)
+ if not model:
+ raise ToolProviderCredentialValidationError("Please input model")
+
+ api_url = str(URL(base_url) / "sdapi" / "v1" / "sd-models")
+ response = get(url=api_url, timeout=10)
+ if response.status_code == 404:
+ # try draw a picture
+ self._invoke(
+ user_id="test",
+ tool_parameters={
+ "prompt": "a cat",
+ "width": 1024,
+ "height": 1024,
+ "steps": 1,
+ "lora": "",
+ },
+ )
+ elif response.status_code != 200:
+ raise ToolProviderCredentialValidationError("Failed to get models")
+ else:
+ models = [d["model_name"] for d in response.json()]
+ if len([d for d in models if d == model]) > 0:
+ return self.create_text_message(json.dumps(models))
+ else:
+ raise ToolProviderCredentialValidationError(f"model {model} does not exist")
+ except Exception as e:
+ raise ToolProviderCredentialValidationError(f"Failed to get models, {e}")
+
+ def get_sd_models(self) -> list[str]:
+ """
+ get sd models
+ """
+ try:
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ return []
+ api_url = str(URL(base_url) / "sdapi" / "v1" / "sd-models")
+ response = get(url=api_url, timeout=120)
+ if response.status_code != 200:
+ return []
+ else:
+ return [d["model_name"] for d in response.json()]
+ except Exception as e:
+ return []
+
+ def get_sample_methods(self) -> list[str]:
+ """
+ get sample method
+ """
+ try:
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ return []
+ api_url = str(URL(base_url) / "sdapi" / "v1" / "samplers")
+ response = get(url=api_url, timeout=120)
+ if response.status_code != 200:
+ return []
+ else:
+ return [d["name"] for d in response.json()]
+ except Exception as e:
+ return []
+
+ def img2img(
+ self, base_url: str, tool_parameters: dict[str, Any]
+ ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+ """
+ generate image
+ """
+
+ # Fetch the binary data of the image
+ image_variable = self.get_default_image_variable()
+ image_binary = self.get_variable_file(image_variable.name)
+ if not image_binary:
+ return self.create_text_message("Image not found, please request user to generate image firstly.")
+
+ # Convert image to RGB and save as PNG
+ try:
+ with Image.open(io.BytesIO(image_binary)) as image, io.BytesIO() as buffer:
+ image.convert("RGB").save(buffer, format="PNG")
+ image_binary = buffer.getvalue()
+ except Exception as e:
+ return self.create_text_message(f"Failed to process the image: {str(e)}")
+
+ # copy draw options
+ draw_options = deepcopy(DRAW_TEXT_OPTIONS)
+ # set image options
+ model = tool_parameters.get("model", "")
+ draw_options_image = {
+ "init_images": [b64encode(image_binary).decode("utf-8")],
+ "denoising_strength": 0.9,
+ "restore_faces": False,
+ "script_args": [],
+ "override_settings": {"sd_model_checkpoint": model},
+ "resize_mode": 0,
+ "image_cfg_scale": 0,
+ # "mask": None,
+ "mask_blur_x": 4,
+ "mask_blur_y": 4,
+ "mask_blur": 0,
+ "mask_round": True,
+ "inpainting_fill": 0,
+ "inpaint_full_res": True,
+ "inpaint_full_res_padding": 0,
+ "inpainting_mask_invert": 0,
+ "initial_noise_multiplier": 0,
+ # "latent_mask": None,
+ "include_init_images": True,
+ }
+ # update key and values
+ draw_options.update(draw_options_image)
+ draw_options.update(tool_parameters)
+
+ # get prompt lora model
+ prompt = tool_parameters.get("prompt", "")
+ lora = tool_parameters.get("lora", "")
+ model = tool_parameters.get("model", "")
+ if lora:
+ draw_options["prompt"] = f"{lora},{prompt}"
+ else:
+ draw_options["prompt"] = prompt
+
+ try:
+ url = str(URL(base_url) / "sdapi" / "v1" / "img2img")
+ response = post(
+ url,
+ json=draw_options,
+ timeout=120,
+ headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+ )
+ if response.status_code != 200:
+ return self.create_text_message("Failed to generate image")
+
+ image = response.json()["images"][0]
+
+ return self.create_blob_message(
+ blob=b64decode(image),
+ meta={"mime_type": "image/png"},
+ save_as=self.VariableKey.IMAGE.value,
+ )
+
+ except Exception as e:
+ return self.create_text_message("Failed to generate image")
+
+ def text2img(
+ self, base_url: str, tool_parameters: dict[str, Any]
+ ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+ """
+ generate image
+ """
+ # copy draw options
+ draw_options = deepcopy(DRAW_TEXT_OPTIONS)
+ draw_options.update(tool_parameters)
+ # get prompt lora model
+ prompt = tool_parameters.get("prompt", "")
+ lora = tool_parameters.get("lora", "")
+ model = tool_parameters.get("model", "")
+ if lora:
+ draw_options["prompt"] = f"{lora},{prompt}"
+ else:
+ draw_options["prompt"] = prompt
+ draw_options["override_settings"]["sd_model_checkpoint"] = model
+
+ try:
+ url = str(URL(base_url) / "sdapi" / "v1" / "txt2img")
+ response = post(
+ url,
+ json=draw_options,
+ timeout=120,
+ headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+ )
+ if response.status_code != 200:
+ return self.create_text_message("Failed to generate image")
+
+ image = response.json()["images"][0]
+
+ return self.create_blob_message(
+ blob=b64decode(image),
+ meta={"mime_type": "image/png"},
+ save_as=self.VariableKey.IMAGE.value,
+ )
+
+ except Exception as e:
+ return self.create_text_message("Failed to generate image")
+
+ def get_runtime_parameters(self) -> list[ToolParameter]:
+ parameters = [
+ ToolParameter(
+ name="prompt",
+ label=I18nObject(en_US="Prompt", zh_Hans="Prompt"),
+ human_description=I18nObject(
+ en_US="Image prompt, you can check the official documentation of Stable Diffusion",
+ zh_Hans="图像提示词,您可以查看 Stable Diffusion 的官方文档",
+ ),
+ type=ToolParameter.ToolParameterType.STRING,
+ form=ToolParameter.ToolParameterForm.LLM,
+ llm_description="Image prompt of Stable Diffusion, you should describe the image you want to generate"
+ " as a list of words as possible as detailed, the prompt must be written in English.",
+ required=True,
+ ),
+ ]
+ if len(self.list_default_image_variables()) != 0:
+ parameters.append(
+ ToolParameter(
+ name="image_id",
+ label=I18nObject(en_US="image_id", zh_Hans="image_id"),
+ human_description=I18nObject(
+ en_US="Image id of the image you want to generate based on, if you want to generate image based"
+ " on the default image, you can leave this field empty.",
+ zh_Hans="您想要生成的图像的图像 ID,如果您想要基于默认图像生成图像,则可以将此字段留空。",
+ ),
+ type=ToolParameter.ToolParameterType.STRING,
+ form=ToolParameter.ToolParameterForm.LLM,
+ llm_description="Image id of the original image, you can leave this field empty if you want to"
+ " generate a new image.",
+ required=True,
+ options=[
+ ToolParameterOption(value=i.name, label=I18nObject(en_US=i.name, zh_Hans=i.name))
+ for i in self.list_default_image_variables()
+ ],
+ )
+ )
+
+ if self.runtime.credentials:
+ try:
+ models = self.get_sd_models()
+ if len(models) != 0:
+ parameters.append(
+ ToolParameter(
+ name="model",
+ label=I18nObject(en_US="Model", zh_Hans="Model"),
+ human_description=I18nObject(
+ en_US="Model of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ zh_Hans="Stable Diffusion 的模型,您可以查看 Stable Diffusion 的官方文档",
+ ),
+ type=ToolParameter.ToolParameterType.SELECT,
+ form=ToolParameter.ToolParameterForm.FORM,
+ llm_description="Model of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ required=True,
+ default=models[0],
+ options=[
+ ToolParameterOption(value=i, label=I18nObject(en_US=i, zh_Hans=i)) for i in models
+ ],
+ )
+ )
+
+ except:
+ pass
+
+ sample_methods = self.get_sample_methods()
+ if len(sample_methods) != 0:
+ parameters.append(
+ ToolParameter(
+ name="sampler_name",
+ label=I18nObject(en_US="Sampling method", zh_Hans="Sampling method"),
+ human_description=I18nObject(
+ en_US="Sampling method of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ zh_Hans="Stable Diffusion 的Sampling method,您可以查看 Stable Diffusion 的官方文档",
+ ),
+ type=ToolParameter.ToolParameterType.SELECT,
+ form=ToolParameter.ToolParameterForm.FORM,
+ llm_description="Sampling method of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ required=True,
+ default=sample_methods[0],
+ options=[
+ ToolParameterOption(value=i, label=I18nObject(en_US=i, zh_Hans=i)) for i in sample_methods
+ ],
+ )
+ )
+ return parameters
diff --git a/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml
new file mode 100644
index 0000000000..4f1d17f175
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml
@@ -0,0 +1,87 @@
+identity:
+ name: stable_diffusion
+ author: xinference
+ label:
+ en_US: Stable Diffusion
+ zh_Hans: Stable Diffusion
+description:
+ human:
+ en_US: Generate images using Stable Diffusion models.
+ zh_Hans: 使用 Stable Diffusion 模型生成图片。
+ llm: draw the image you want based on your prompt.
+parameters:
+ - name: prompt
+ type: string
+ required: true
+ label:
+ en_US: Prompt
+ zh_Hans: 提示词
+ human_description:
+ en_US: Image prompt
+ zh_Hans: 图像提示词
+ llm_description: Image prompt of Stable Diffusion, you should describe the image you want to generate as a list of words as possible as detailed, the prompt must be written in English.
+ form: llm
+ - name: model
+ type: string
+ required: false
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ human_description:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ form: form
+ - name: lora
+ type: string
+ required: false
+ label:
+ en_US: Lora
+ zh_Hans: Lora
+ human_description:
+ en_US: Lora
+ zh_Hans: Lora
+ form: form
+ - name: steps
+ type: number
+ required: false
+ label:
+ en_US: Steps
+ zh_Hans: Steps
+ human_description:
+ en_US: Steps
+ zh_Hans: Steps
+ form: form
+ default: 10
+ - name: width
+ type: number
+ required: false
+ label:
+ en_US: Width
+ zh_Hans: Width
+ human_description:
+ en_US: Width
+ zh_Hans: Width
+ form: form
+ default: 1024
+ - name: height
+ type: number
+ required: false
+ label:
+ en_US: Height
+ zh_Hans: Height
+ human_description:
+ en_US: Height
+ zh_Hans: Height
+ form: form
+ default: 1024
+ - name: negative_prompt
+ type: string
+ required: false
+ label:
+ en_US: Negative prompt
+ zh_Hans: Negative prompt
+ human_description:
+ en_US: Negative prompt
+ zh_Hans: Negative prompt
+ form: form
+ default: bad art, ugly, deformed, watermark, duplicated, discontinuous lines
diff --git a/api/core/tools/provider/builtin/xinference/xinference.py b/api/core/tools/provider/builtin/xinference/xinference.py
new file mode 100644
index 0000000000..7c2428cc00
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/xinference.py
@@ -0,0 +1,18 @@
+import requests
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class XinferenceProvider(BuiltinToolProviderController):
+ def _validate_credentials(self, credentials: dict) -> None:
+ base_url = credentials.get("base_url")
+ api_key = credentials.get("api_key")
+ model = credentials.get("model")
+ res = requests.post(
+ f"{base_url}/sdapi/v1/options",
+ headers={"Authorization": f"Bearer {api_key}"},
+ json={"sd_model_checkpoint": model},
+ )
+ if res.status_code != 200:
+ raise ToolProviderCredentialValidationError("Xinference API key is invalid")
diff --git a/api/core/tools/provider/builtin/xinference/xinference.yaml b/api/core/tools/provider/builtin/xinference/xinference.yaml
new file mode 100644
index 0000000000..19aaf5cbd1
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/xinference.yaml
@@ -0,0 +1,40 @@
+identity:
+ author: xinference
+ name: xinference
+ label:
+ en_US: Xinference
+ zh_Hans: Xinference
+ description:
+ zh_Hans: Xinference 提供的兼容 Stable Diffusion web ui 的图片生成 API。
+ en_US: Stable Diffusion web ui compatible API provided by Xinference.
+ icon: icon.png
+ tags:
+ - image
+credentials_for_provider:
+ base_url:
+ type: secret-input
+ required: true
+ label:
+ en_US: Base URL
+ zh_Hans: Xinference 服务器的 Base URL
+ placeholder:
+ en_US: Please input Xinference server's Base URL
+ zh_Hans: 请输入 Xinference 服务器的 Base URL
+ model:
+ type: text-input
+ required: true
+ label:
+ en_US: Model
+ zh_Hans: 模型
+ placeholder:
+ en_US: Please input your model name
+ zh_Hans: 请输入你的模型名称
+ api_key:
+ type: secret-input
+ required: true
+ label:
+ en_US: API Key
+ zh_Hans: Xinference 服务器的 API Key
+ placeholder:
+ en_US: Please input Xinference server's API Key
+ zh_Hans: 请输入 Xinference 服务器的 API Key
diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py
index 57e4f716fd..8342dbd13d 100644
--- a/api/core/workflow/graph_engine/graph_engine.py
+++ b/api/core/workflow/graph_engine/graph_engine.py
@@ -180,16 +180,20 @@ class GraphEngine:
# trigger graph run success event
yield GraphRunSucceededEvent(outputs=self.graph_runtime_state.outputs)
+ self._release_thread()
except GraphRunFailedError as e:
yield GraphRunFailedEvent(error=e.error)
+ self._release_thread()
return
except Exception as e:
logger.exception("Unknown Error when graph running")
yield GraphRunFailedEvent(error=str(e))
+ self._release_thread()
raise e
- finally:
- if self.is_main_thread_pool and self.thread_pool_id in GraphEngine.workflow_thread_pool_mapping:
- del GraphEngine.workflow_thread_pool_mapping[self.thread_pool_id]
+
+ def _release_thread(self):
+ if self.is_main_thread_pool and self.thread_pool_id in GraphEngine.workflow_thread_pool_mapping:
+ del GraphEngine.workflow_thread_pool_mapping[self.thread_pool_id]
def _run(
self,
diff --git a/api/core/workflow/nodes/end/end_stream_processor.py b/api/core/workflow/nodes/end/end_stream_processor.py
index 0366d7965d..1aecf863ac 100644
--- a/api/core/workflow/nodes/end/end_stream_processor.py
+++ b/api/core/workflow/nodes/end/end_stream_processor.py
@@ -22,8 +22,8 @@ class EndStreamProcessor(StreamProcessor):
for end_node_id, _ in self.end_stream_param.end_stream_variable_selector_mapping.items():
self.route_position[end_node_id] = 0
self.current_stream_chunk_generating_node_ids: dict[str, list[str]] = {}
- self.has_outputed = False
- self.outputed_node_ids = set()
+ self.has_output = False
+ self.output_node_ids = set()
def process(self, generator: Generator[GraphEngineEvent, None, None]) -> Generator[GraphEngineEvent, None, None]:
for event in generator:
@@ -34,11 +34,11 @@ class EndStreamProcessor(StreamProcessor):
yield event
elif isinstance(event, NodeRunStreamChunkEvent):
if event.in_iteration_id:
- if self.has_outputed and event.node_id not in self.outputed_node_ids:
+ if self.has_output and event.node_id not in self.output_node_ids:
event.chunk_content = "\n" + event.chunk_content
- self.outputed_node_ids.add(event.node_id)
- self.has_outputed = True
+ self.output_node_ids.add(event.node_id)
+ self.has_output = True
yield event
continue
@@ -53,11 +53,11 @@ class EndStreamProcessor(StreamProcessor):
)
if stream_out_end_node_ids:
- if self.has_outputed and event.node_id not in self.outputed_node_ids:
+ if self.has_output and event.node_id not in self.output_node_ids:
event.chunk_content = "\n" + event.chunk_content
- self.outputed_node_ids.add(event.node_id)
- self.has_outputed = True
+ self.output_node_ids.add(event.node_id)
+ self.has_output = True
yield event
elif isinstance(event, NodeRunSucceededEvent):
yield event
@@ -124,11 +124,11 @@ class EndStreamProcessor(StreamProcessor):
if text:
current_node_id = value_selector[0]
- if self.has_outputed and current_node_id not in self.outputed_node_ids:
+ if self.has_output and current_node_id not in self.output_node_ids:
text = "\n" + text
- self.outputed_node_ids.add(current_node_id)
- self.has_outputed = True
+ self.output_node_ids.add(current_node_id)
+ self.has_output = True
yield NodeRunStreamChunkEvent(
id=event.id,
node_id=event.node_id,
diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py
index 6f20745daf..01bb4e9076 100644
--- a/api/core/workflow/nodes/iteration/iteration_node.py
+++ b/api/core/workflow/nodes/iteration/iteration_node.py
@@ -89,6 +89,7 @@ class IterationNode(BaseNode):
variable_pool=variable_pool,
max_execution_steps=dify_config.WORKFLOW_MAX_EXECUTION_STEPS,
max_execution_time=dify_config.WORKFLOW_MAX_EXECUTION_TIME,
+ thread_pool_id=self.thread_pool_id,
)
start_at = datetime.now(timezone.utc).replace(tzinfo=None)
diff --git a/api/core/workflow/nodes/question_classifier/template_prompts.py b/api/core/workflow/nodes/question_classifier/template_prompts.py
index ce32b01aa4..4bca2d9dd4 100644
--- a/api/core/workflow/nodes/question_classifier/template_prompts.py
+++ b/api/core/workflow/nodes/question_classifier/template_prompts.py
@@ -2,9 +2,9 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
### Job Description',
You are a text classification engine that analyzes text data and assigns categories based on user input or automatically determined categories.
### Task
- Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.Additionally, you need to extract the key words from the text that are related to the classification.
+ Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification.
### Format
- The input text is in the variable input_text.Categories are specified as a category list with two filed category_id and category_name in the variable categories .Classification instructions may be included to improve the classification accuracy.
+ The input text is in the variable input_text. Categories are specified as a category list with two filed category_id and category_name in the variable categories. Classification instructions may be included to improve the classification accuracy.
### Constraint
DO NOT include anything other than the JSON array in your response.
### Memory
@@ -52,7 +52,7 @@ QUESTION_CLASSIFIER_COMPLETION_PROMPT = """
### Job Description
You are a text classification engine that analyzes text data and assigns categories based on user input or automatically determined categories.
### Task
-Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification.
+Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification.
### Format
The input text is in the variable input_text. Categories are specified as a category list with two filed category_id and category_name in the variable categories. Classification instructions may be included to improve the classification accuracy.
### Constraint
diff --git a/api/extensions/ext_sentry.py b/api/extensions/ext_sentry.py
index c2dc736038..e255e7eb35 100644
--- a/api/extensions/ext_sentry.py
+++ b/api/extensions/ext_sentry.py
@@ -5,6 +5,8 @@ from sentry_sdk.integrations.celery import CeleryIntegration
from sentry_sdk.integrations.flask import FlaskIntegration
from werkzeug.exceptions import HTTPException
+from core.model_runtime.errors.invoke import InvokeRateLimitError
+
def before_send(event, hint):
if "exc_info" in hint:
@@ -20,7 +22,13 @@ def init_app(app):
sentry_sdk.init(
dsn=app.config.get("SENTRY_DSN"),
integrations=[FlaskIntegration(), CeleryIntegration()],
- ignore_errors=[HTTPException, ValueError, openai.APIStatusError, parse_error.defaultErrorResponse],
+ ignore_errors=[
+ HTTPException,
+ ValueError,
+ openai.APIStatusError,
+ InvokeRateLimitError,
+ parse_error.defaultErrorResponse,
+ ],
traces_sample_rate=app.config.get("SENTRY_TRACES_SAMPLE_RATE", 1.0),
profiles_sample_rate=app.config.get("SENTRY_PROFILES_SAMPLE_RATE", 1.0),
environment=app.config.get("DEPLOY_ENV"),
diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py
index 5ce18b7292..1e6530f6f4 100644
--- a/api/extensions/ext_storage.py
+++ b/api/extensions/ext_storage.py
@@ -1,3 +1,4 @@
+import logging
from collections.abc import Generator
from typing import Union
@@ -40,28 +41,56 @@ class Storage:
self.storage_runner = LocalStorage(app=app)
def save(self, filename, data):
- self.storage_runner.save(filename, data)
+ try:
+ self.storage_runner.save(filename, data)
+ except Exception as e:
+ logging.exception("Failed to save file: %s", e)
+ raise e
def load(self, filename: str, stream: bool = False) -> Union[bytes, Generator]:
- if stream:
- return self.load_stream(filename)
- else:
- return self.load_once(filename)
+ try:
+ if stream:
+ return self.load_stream(filename)
+ else:
+ return self.load_once(filename)
+ except Exception as e:
+ logging.exception("Failed to load file: %s", e)
+ raise e
def load_once(self, filename: str) -> bytes:
- return self.storage_runner.load_once(filename)
+ try:
+ return self.storage_runner.load_once(filename)
+ except Exception as e:
+ logging.exception("Failed to load_once file: %s", e)
+ raise e
def load_stream(self, filename: str) -> Generator:
- return self.storage_runner.load_stream(filename)
+ try:
+ return self.storage_runner.load_stream(filename)
+ except Exception as e:
+ logging.exception("Failed to load_stream file: %s", e)
+ raise e
def download(self, filename, target_filepath):
- self.storage_runner.download(filename, target_filepath)
+ try:
+ self.storage_runner.download(filename, target_filepath)
+ except Exception as e:
+ logging.exception("Failed to download file: %s", e)
+ raise e
def exists(self, filename):
- return self.storage_runner.exists(filename)
+ try:
+ return self.storage_runner.exists(filename)
+ except Exception as e:
+ logging.exception("Failed to check file exists: %s", e)
+ raise e
def delete(self, filename):
- return self.storage_runner.delete(filename)
+ try:
+ return self.storage_runner.delete(filename)
+ except Exception as e:
+ logging.exception("Failed to delete file: %s", e)
+ raise e
storage = Storage()
diff --git a/api/fields/conversation_fields.py b/api/fields/conversation_fields.py
index 9207314fc2..3dcd88d1de 100644
--- a/api/fields/conversation_fields.py
+++ b/api/fields/conversation_fields.py
@@ -75,6 +75,7 @@ message_detail_fields = {
"metadata": fields.Raw(attribute="message_metadata_dict"),
"status": fields.String,
"error": fields.String,
+ "parent_message_id": fields.String,
}
feedback_stat_fields = {"like": fields.Integer, "dislike": fields.Integer}
diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py
index 3d2df87afb..c938097131 100644
--- a/api/fields/message_fields.py
+++ b/api/fields/message_fields.py
@@ -62,6 +62,7 @@ retriever_resource_fields = {
message_fields = {
"id": fields.String,
"conversation_id": fields.String,
+ "parent_message_id": fields.String,
"inputs": fields.Raw,
"query": fields.String,
"answer": fields.String(attribute="re_sign_file_url_answer"),
diff --git a/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py
new file mode 100644
index 0000000000..fd957eeafb
--- /dev/null
+++ b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py
@@ -0,0 +1,36 @@
+"""add parent_message_id to messages
+
+Revision ID: d57ba9ebb251
+Revises: 675b5321501b
+Create Date: 2024-09-11 10:12:45.826265
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+import models as models
+
+# revision identifiers, used by Alembic.
+revision = 'd57ba9ebb251'
+down_revision = '675b5321501b'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ with op.batch_alter_table('messages', schema=None) as batch_op:
+ batch_op.add_column(sa.Column('parent_message_id', models.types.StringUUID(), nullable=True))
+
+ # Set parent_message_id for existing messages to uuid_nil() to distinguish them from new messages with actual parent IDs or NULLs
+ op.execute('UPDATE messages SET parent_message_id = uuid_nil() WHERE parent_message_id IS NULL')
+
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ with op.batch_alter_table('messages', schema=None) as batch_op:
+ batch_op.drop_column('parent_message_id')
+
+ # ### end Alembic commands ###
diff --git a/api/models/model.py b/api/models/model.py
index ae0bc3210b..53940a5a16 100644
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -710,6 +710,7 @@ class Message(db.Model):
answer_tokens = db.Column(db.Integer, nullable=False, server_default=db.text("0"))
answer_unit_price = db.Column(db.Numeric(10, 4), nullable=False)
answer_price_unit = db.Column(db.Numeric(10, 7), nullable=False, server_default=db.text("0.001"))
+ parent_message_id = db.Column(StringUUID, nullable=True)
provider_response_latency = db.Column(db.Float, nullable=False, server_default=db.text("0"))
total_price = db.Column(db.Numeric(10, 7))
currency = db.Column(db.String(255), nullable=False)
diff --git a/api/poetry.lock b/api/poetry.lock
index 28c688cc9c..184cdb9e81 100644
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -616,13 +616,13 @@ files = [
[[package]]
name = "azure-core"
-version = "1.30.2"
+version = "1.31.0"
description = "Microsoft Azure Core Library for Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "azure-core-1.30.2.tar.gz", hash = "sha256:a14dc210efcd608821aa472d9fb8e8d035d29b68993819147bc290a8ac224472"},
- {file = "azure_core-1.30.2-py3-none-any.whl", hash = "sha256:cf019c1ca832e96274ae85abd3d9f752397194d9fea3b41487290562ac8abe4a"},
+ {file = "azure_core-1.31.0-py3-none-any.whl", hash = "sha256:22954de3777e0250029360ef31d80448ef1be13b80a459bff80ba7073379e2cd"},
+ {file = "azure_core-1.31.0.tar.gz", hash = "sha256:656a0dd61e1869b1506b7c6a3b31d62f15984b1a573d6326f6aa2f3e4123284b"},
]
[package.dependencies]
@@ -828,13 +828,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
-version = "1.35.17"
+version = "1.35.19"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.8"
files = [
- {file = "botocore-1.35.17-py3-none-any.whl", hash = "sha256:a93f773ca93139529b5d36730b382dbee63ab4c7f26129aa5c84835255ca999d"},
- {file = "botocore-1.35.17.tar.gz", hash = "sha256:0d35d03ea647b5d464c7f77bdab6fb23ae5d49752b13cf97ab84444518c7b1bd"},
+ {file = "botocore-1.35.19-py3-none-any.whl", hash = "sha256:c83f7f0cacfe7c19b109b363ebfa8736e570d24922f16ed371681f58ebab44a9"},
+ {file = "botocore-1.35.19.tar.gz", hash = "sha256:42d6d8db7250cbd7899f786f9861e02cab17dc238f64d6acb976098ed9809625"},
]
[package.dependencies]
@@ -2429,13 +2429,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "fastapi"
-version = "0.114.1"
+version = "0.114.2"
description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
optional = false
python-versions = ">=3.8"
files = [
- {file = "fastapi-0.114.1-py3-none-any.whl", hash = "sha256:5d4746f6e4b7dff0b4f6b6c6d5445645285f662fe75886e99af7ee2d6b58bb3e"},
- {file = "fastapi-0.114.1.tar.gz", hash = "sha256:1d7bbbeabbaae0acb0c22f0ab0b040f642d3093ca3645f8c876b6f91391861d8"},
+ {file = "fastapi-0.114.2-py3-none-any.whl", hash = "sha256:44474a22913057b1acb973ab90f4b671ba5200482e7622816d79105dcece1ac5"},
+ {file = "fastapi-0.114.2.tar.gz", hash = "sha256:0adb148b62edb09e8c6eeefa3ea934e8f276dabc038c5a82989ea6346050c3da"},
]
[package.dependencies]
@@ -3057,20 +3057,20 @@ tests = ["cython", "hypothesis", "mpmath", "pytest", "setuptools"]
[[package]]
name = "google-ai-generativelanguage"
-version = "0.6.1"
+version = "0.6.9"
description = "Google Ai Generativelanguage API client library"
optional = false
python-versions = ">=3.7"
files = [
- {file = "google-ai-generativelanguage-0.6.1.tar.gz", hash = "sha256:4abf37000718b20c43f4b90672b3ab8850738b02457efffd11f3184e03272ed2"},
- {file = "google_ai_generativelanguage-0.6.1-py3-none-any.whl", hash = "sha256:d2afc991c47663bdf65bd4aabcd89723550b81ad0a6d0be8bfb0160755da4cf0"},
+ {file = "google_ai_generativelanguage-0.6.9-py3-none-any.whl", hash = "sha256:50360cd80015d1a8cc70952e98560f32fa06ddee2e8e9f4b4b98e431dc561e0b"},
+ {file = "google_ai_generativelanguage-0.6.9.tar.gz", hash = "sha256:899f1d3a06efa9739f1cd9d2788070178db33c89d4a76f2e8f4da76f649155fa"},
]
[package.dependencies]
google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
proto-plus = ">=1.22.3,<2.0.0dev"
-protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev"
+protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
[[package]]
name = "google-api-core"
@@ -3336,16 +3336,16 @@ testing = ["pytest"]
[[package]]
name = "google-generativeai"
-version = "0.5.0"
+version = "0.8.1"
description = "Google Generative AI High level API client library and tools."
optional = false
python-versions = ">=3.9"
files = [
- {file = "google_generativeai-0.5.0-py3-none-any.whl", hash = "sha256:207ed12c6a2eeab549a45abbf5373c82077f62b16030bdb502556c78f6d1b5d2"},
+ {file = "google_generativeai-0.8.1-py3-none-any.whl", hash = "sha256:b031877f24d51af0945207657c085896a0a886eceec7a1cb7029327b0aa6e2f6"},
]
[package.dependencies]
-google-ai-generativelanguage = "0.6.1"
+google-ai-generativelanguage = "0.6.9"
google-api-core = "*"
google-api-python-client = "*"
google-auth = ">=2.15.0"
@@ -3990,15 +3990,18 @@ files = [
[[package]]
name = "idna"
-version = "3.8"
+version = "3.9"
description = "Internationalized Domain Names in Applications (IDNA)"
optional = false
python-versions = ">=3.6"
files = [
- {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"},
- {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"},
+ {file = "idna-3.9-py3-none-any.whl", hash = "sha256:69297d5da0cc9281c77efffb4e730254dd45943f45bbfb461de5991713989b1e"},
+ {file = "idna-3.9.tar.gz", hash = "sha256:e5c5dafde284f26e9e0f28f6ea2d6400abd5ca099864a67f576f3981c6476124"},
]
+[package.extras]
+all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+
[[package]]
name = "importlib-metadata"
version = "6.11.0"
@@ -4132,6 +4135,20 @@ files = [
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
]
+[[package]]
+name = "jsonlines"
+version = "4.0.0"
+description = "Library with helpers for the jsonlines file format"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55"},
+ {file = "jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74"},
+]
+
+[package.dependencies]
+attrs = ">=19.2.0"
+
[[package]]
name = "jsonpath-ng"
version = "1.6.1"
@@ -4393,13 +4410,13 @@ six = "*"
[[package]]
name = "langfuse"
-version = "2.48.0"
+version = "2.48.1"
description = "A client library for accessing langfuse"
optional = false
python-versions = "<4.0,>=3.8.1"
files = [
- {file = "langfuse-2.48.0-py3-none-any.whl", hash = "sha256:475b047e461f8a45e3c7d81b6a87e0b9e389c489d465b838aa69cbdd16eeacce"},
- {file = "langfuse-2.48.0.tar.gz", hash = "sha256:46e7e6e6e97fe03115a9f95d7f29b3fcd1848a9d1bb34608ebb42a3931919e45"},
+ {file = "langfuse-2.48.1-py3-none-any.whl", hash = "sha256:8661070b6d94ba1d7da92c054f3110b6ecf4489d6e8204a4080f934f3f49ebf2"},
+ {file = "langfuse-2.48.1.tar.gz", hash = "sha256:b8117d90babec6be1bc3303b42e0b71848531eae44118e6e0123d03e7961d0fc"},
]
[package.dependencies]
@@ -4418,13 +4435,13 @@ openai = ["openai (>=0.27.8)"]
[[package]]
name = "langsmith"
-version = "0.1.118"
+version = "0.1.120"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
optional = false
python-versions = "<4.0,>=3.8.1"
files = [
- {file = "langsmith-0.1.118-py3-none-any.whl", hash = "sha256:f017127b3efb037da5e46ff4f8583e8192e7955191737240c327f3eadc144d7c"},
- {file = "langsmith-0.1.118.tar.gz", hash = "sha256:ff1ca06c92c6081250244ebbce5d0bb347b9d898d2e9b60a13b11f0f0720f09f"},
+ {file = "langsmith-0.1.120-py3-none-any.whl", hash = "sha256:54d2785e301646c0988e0a69ebe4d976488c87b41928b358cb153b6ddd8db62b"},
+ {file = "langsmith-0.1.120.tar.gz", hash = "sha256:25499ca187b41bd89d784b272b97a8d76f60e0e21bdf20336e8a2aa6a9b23ac9"},
]
[package.dependencies]
@@ -4466,6 +4483,24 @@ files = [
{file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"},
]
+[[package]]
+name = "loguru"
+version = "0.7.2"
+description = "Python logging made (stupidly) simple"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"},
+ {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"},
+]
+
+[package.dependencies]
+colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
+win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
+
+[package.extras]
+dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
+
[[package]]
name = "lxml"
version = "5.3.0"
@@ -5317,6 +5352,36 @@ plot = ["matplotlib"]
tgrep = ["pyparsing"]
twitter = ["twython"]
+[[package]]
+name = "nomic"
+version = "3.1.2"
+description = "The official Nomic python client."
+optional = false
+python-versions = "*"
+files = [
+ {file = "nomic-3.1.2.tar.gz", hash = "sha256:2de1ab1dcf2429011c92987bb2f1eafe1a3a4901c3185b18f994bf89616f606d"},
+]
+
+[package.dependencies]
+click = "*"
+jsonlines = "*"
+loguru = "*"
+numpy = "*"
+pandas = "*"
+pillow = "*"
+pyarrow = "*"
+pydantic = "*"
+pyjwt = "*"
+requests = "*"
+rich = "*"
+tqdm = "*"
+
+[package.extras]
+all = ["nomic[aws,local]"]
+aws = ["boto3", "sagemaker"]
+dev = ["black (==24.3.0)", "cairosvg", "coverage", "isort", "mkautodoc", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", "myst-parser", "nomic[all]", "pandas", "pillow", "pylint", "pyright", "pytest", "pytorch-lightning", "twine"]
+local = ["gpt4all (>=2.5.0,<3)"]
+
[[package]]
name = "novita-client"
version = "0.5.7"
@@ -6232,13 +6297,13 @@ xmp = ["defusedxml"]
[[package]]
name = "platformdirs"
-version = "4.3.2"
+version = "4.3.3"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
python-versions = ">=3.8"
files = [
- {file = "platformdirs-4.3.2-py3-none-any.whl", hash = "sha256:eb1c8582560b34ed4ba105009a4badf7f6f85768b30126f351328507b2beb617"},
- {file = "platformdirs-4.3.2.tar.gz", hash = "sha256:9e5e27a08aa095dd127b9f2e764d74254f482fef22b0970773bfba79d091ab8c"},
+ {file = "platformdirs-4.3.3-py3-none-any.whl", hash = "sha256:50a5450e2e84f44539718293cbb1da0a0885c9d14adf21b77bae4e66fc99d9b5"},
+ {file = "platformdirs-4.3.3.tar.gz", hash = "sha256:d4e0b7d8ec176b341fb03cb11ca12d0276faa8c485f9cd218f613840463fc2c0"},
]
[package.extras]
@@ -6248,13 +6313,13 @@ type = ["mypy (>=1.11.2)"]
[[package]]
name = "plotly"
-version = "5.24.0"
+version = "5.24.1"
description = "An open-source, interactive data visualization library for Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "plotly-5.24.0-py3-none-any.whl", hash = "sha256:0e54efe52c8cef899f7daa41be9ed97dfb6be622613a2a8f56a86a0634b2b67e"},
- {file = "plotly-5.24.0.tar.gz", hash = "sha256:eae9f4f54448682442c92c1e97148e3ad0c52f0cf86306e1b76daba24add554a"},
+ {file = "plotly-5.24.1-py3-none-any.whl", hash = "sha256:f67073a1e637eb0dc3e46324d9d51e2fe76e9727c892dde64ddf1e1b51f29089"},
+ {file = "plotly-5.24.1.tar.gz", hash = "sha256:dbc8ac8339d248a4bcc36e08a5659bacfe1b079390b8953533f4eb22169b4bae"},
]
[package.dependencies]
@@ -7025,15 +7090,18 @@ files = [
[[package]]
name = "pyreadline3"
-version = "3.4.3"
+version = "3.5.2"
description = "A python implementation of GNU readline."
optional = false
-python-versions = "*"
+python-versions = ">=3.8"
files = [
- {file = "pyreadline3-3.4.3-py3-none-any.whl", hash = "sha256:f832c5898f4f9a0f81d48a8c499b39d0179de1a465ea3def1a7e7231840b4ed6"},
- {file = "pyreadline3-3.4.3.tar.gz", hash = "sha256:ebab0baca37f50e2faa1dd99a6da1c75de60e0d68a3b229c134bbd12786250e2"},
+ {file = "pyreadline3-3.5.2-py3-none-any.whl", hash = "sha256:a87d56791e2965b2b187e2ea33dcf664600842c997c0623c95cf8ef07db83de9"},
+ {file = "pyreadline3-3.5.2.tar.gz", hash = "sha256:ba82292e52c5a3bb256b291af0c40b457c1e8699cac9a873abbcaac8aef3a1bb"},
]
+[package.extras]
+dev = ["build", "flake8", "pytest", "twine"]
+
[[package]]
name = "pytest"
version = "8.3.3"
@@ -8778,13 +8846,13 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"]
[[package]]
name = "tencentcloud-sdk-python-common"
-version = "3.0.1230"
+version = "3.0.1231"
description = "Tencent Cloud Common SDK for Python"
optional = false
python-versions = "*"
files = [
- {file = "tencentcloud-sdk-python-common-3.0.1230.tar.gz", hash = "sha256:1e0f3bab80026fcb0083820869239b3f8cf30beb8e00e12c213bdecc75eb7577"},
- {file = "tencentcloud_sdk_python_common-3.0.1230-py2.py3-none-any.whl", hash = "sha256:03616c79685c154c689536a9c823d52b855cf49eada70679826a92aff5afd596"},
+ {file = "tencentcloud-sdk-python-common-3.0.1231.tar.gz", hash = "sha256:22aa281ca2eac511e1615b2953da7c4a0bec87cf93a05a7a15dbb61b23a215ee"},
+ {file = "tencentcloud_sdk_python_common-3.0.1231-py2.py3-none-any.whl", hash = "sha256:bd0f7c4df4b156ec35c8731afa1f498043c7e1cd5d2feb595ee441fdb45a061e"},
]
[package.dependencies]
@@ -8792,17 +8860,17 @@ requests = ">=2.16.0"
[[package]]
name = "tencentcloud-sdk-python-hunyuan"
-version = "3.0.1230"
+version = "3.0.1231"
description = "Tencent Cloud Hunyuan SDK for Python"
optional = false
python-versions = "*"
files = [
- {file = "tencentcloud-sdk-python-hunyuan-3.0.1230.tar.gz", hash = "sha256:900d15cb9dc2217b1282d985898ec7ecf97859351c86c6f7efc74685f08a5f85"},
- {file = "tencentcloud_sdk_python_hunyuan-3.0.1230-py2.py3-none-any.whl", hash = "sha256:604dab0d4d66ea942f23d7980c76b5f0f6af3d68a8374e619331a4dd2910991e"},
+ {file = "tencentcloud-sdk-python-hunyuan-3.0.1231.tar.gz", hash = "sha256:6da12f418f14305b3a6b7bb29b6d95bf4038a6b66b81c0e03b8dafc4f6df99ca"},
+ {file = "tencentcloud_sdk_python_hunyuan-3.0.1231-py2.py3-none-any.whl", hash = "sha256:21ba28f69c34c15e20900be3f2c06376fcaf7e58265f939833c55631f2348792"},
]
[package.dependencies]
-tencentcloud-sdk-python-common = "3.0.1230"
+tencentcloud-sdk-python-common = "3.0.1231"
[[package]]
name = "threadpoolctl"
@@ -9205,13 +9273,13 @@ typing-extensions = ">=3.7.4.3"
[[package]]
name = "types-requests"
-version = "2.32.0.20240907"
+version = "2.32.0.20240914"
description = "Typing stubs for requests"
optional = false
python-versions = ">=3.8"
files = [
- {file = "types-requests-2.32.0.20240907.tar.gz", hash = "sha256:ff33935f061b5e81ec87997e91050f7b4af4f82027a7a7a9d9aaea04a963fdf8"},
- {file = "types_requests-2.32.0.20240907-py3-none-any.whl", hash = "sha256:1d1e79faeaf9d42def77f3c304893dea17a97cae98168ac69f3cb465516ee8da"},
+ {file = "types-requests-2.32.0.20240914.tar.gz", hash = "sha256:2850e178db3919d9bf809e434eef65ba49d0e7e33ac92d588f4a5e295fffd405"},
+ {file = "types_requests-2.32.0.20240914-py3-none-any.whl", hash = "sha256:59c2f673eb55f32a99b2894faf6020e1a9f4a402ad0f192bfee0b64469054310"},
]
[package.dependencies]
@@ -9454,13 +9522,13 @@ files = [
[[package]]
name = "urllib3"
-version = "2.2.2"
+version = "2.2.3"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.8"
files = [
- {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
- {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
+ {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
+ {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
]
[package.extras]
@@ -9614,12 +9682,12 @@ files = [
[[package]]
name = "volcengine-python-sdk"
-version = "1.0.100"
+version = "1.0.101"
description = "Volcengine SDK for Python"
optional = false
python-versions = "*"
files = [
- {file = "volcengine-python-sdk-1.0.100.tar.gz", hash = "sha256:cdc194fe3ce51adda6892d2ca1c43edba3300699321dc6c69119c59fc3b28932"},
+ {file = "volcengine-python-sdk-1.0.101.tar.gz", hash = "sha256:1b76e71a6dcf3d5be1b2c058e7d281359e6cca2cc920ffe2567d3115beea1d02"},
]
[package.dependencies]
@@ -9913,6 +9981,20 @@ files = [
beautifulsoup4 = "*"
requests = ">=2.0.0,<3.0.0"
+[[package]]
+name = "win32-setctime"
+version = "1.1.0"
+description = "A small Python utility to set file creation time on Windows"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
+ {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
+]
+
+[package.extras]
+dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
+
[[package]]
name = "wrapt"
version = "1.16.0"
@@ -10008,13 +10090,13 @@ h11 = ">=0.9.0,<1"
[[package]]
name = "xinference-client"
-version = "0.13.3"
+version = "0.15.2"
description = "Client for Xinference"
optional = false
python-versions = "*"
files = [
- {file = "xinference-client-0.13.3.tar.gz", hash = "sha256:822b722100affdff049c27760be7d62ac92de58c87a40d3361066df446ba648f"},
- {file = "xinference_client-0.13.3-py3-none-any.whl", hash = "sha256:f0eff3858b1ebcef2129726f82b09259c177e11db466a7ca23def3d4849c419f"},
+ {file = "xinference-client-0.15.2.tar.gz", hash = "sha256:5c2259bb133148d1cc9bd2b8ec6eb8b5bbeba7f11d6252959f4e6cd79baa53ed"},
+ {file = "xinference_client-0.15.2-py3-none-any.whl", hash = "sha256:b6275adab695e75e75a33e21e0ad212488fc2d5a4d0f693d544c0e78469abbe3"},
]
[package.dependencies]
@@ -10215,13 +10297,13 @@ requests = "*"
[[package]]
name = "zipp"
-version = "3.20.1"
+version = "3.20.2"
description = "Backport of pathlib-compatible object wrapper for zip files"
optional = false
python-versions = ">=3.8"
files = [
- {file = "zipp-3.20.1-py3-none-any.whl", hash = "sha256:9960cd8967c8f85a56f920d5d507274e74f9ff813a0ab8889a5b5be2daf44064"},
- {file = "zipp-3.20.1.tar.gz", hash = "sha256:c22b14cc4763c5a5b04134207736c107db42e9d3ef2d9779d465f5f1bcba572b"},
+ {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
+ {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
]
[package.extras]
@@ -10416,4 +10498,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
-content-hash = "9173a56b2efea12804c980511e1465fba43c7a3d83b1ad284ee149851ed67fc5"
+content-hash = "17c4108d92c415d987f8b437ea3e0484c5601a05bfe175339a8546c93c159bc5"
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 8c10f1dad9..41244f516c 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -28,7 +28,6 @@ select = [
"PLR0402", # manual-from-import
"PLR1711", # useless-return
"PLR1714", # repeated-equality-comparison
- "PLR6201", # literal-membership
"RUF019", # unnecessary-key-check
"RUF100", # unused-noqa
"RUF101", # redirected-noqa
@@ -100,6 +99,8 @@ exclude = [
[tool.pytest_env]
OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
+FIREWORKS_API_KEY = "fw_aaaaaaaaaaaaaaaaaaaa"
+NOMIC_API_KEY = "nk-aaaaaaaaaaaaaaaaaaaa"
AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"
@@ -155,20 +156,20 @@ flask-restful = "~0.3.10"
Flask-SQLAlchemy = "~3.1.1"
gevent = "~23.9.1"
gmpy2 = "~2.2.1"
-google-ai-generativelanguage = "0.6.1"
+google-ai-generativelanguage = "0.6.9"
google-api-core = "2.18.0"
google-api-python-client = "2.90.0"
google-auth = "2.29.0"
google-auth-httplib2 = "0.2.0"
google-cloud-aiplatform = "1.49.0"
google-cloud-storage = "2.16.0"
-google-generativeai = "0.5.0"
+google-generativeai = "0.8.1"
googleapis-common-protos = "1.63.0"
gunicorn = "~22.0.0"
httpx = { version = "~0.27.0", extras = ["socks"] }
huggingface-hub = "~0.16.4"
jieba = "0.42.1"
-langfuse = "^2.36.1"
+langfuse = "^2.48.0"
langsmith = "^0.1.77"
mailchimp-transactional = "~1.0.50"
markdown = "~3.5.1"
@@ -203,7 +204,7 @@ transformers = "~4.35.0"
unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] }
websocket-client = "~1.7.0"
werkzeug = "~3.0.1"
-xinference-client = "0.13.3"
+xinference-client = "0.15.2"
yarl = "~1.9.4"
zhipuai = "1.0.7"
# Before adding new dependency, consider place it in alphabet order (a-z) and suitable group.
@@ -217,6 +218,7 @@ azure-ai-inference = "^1.0.0b3"
volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"}
oci = "^2.133.0"
tos = "^2.7.1"
+nomic = "^3.1.2"
[tool.poetry.group.indriect.dependencies]
kaleido = "0.2.1"
rank-bm25 = "~0.2.2"
diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py
index 30c010ef29..e96f06ed40 100644
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -1100,8 +1100,8 @@ class DocumentService:
DocumentService.data_source_args_validate(args)
DocumentService.process_rule_args_validate(args)
else:
- if ("data_source" not in args and not args["data_source"]) and (
- "process_rule" not in args and not args["process_rule"]
+ if ("data_source" not in args or not args["data_source"]) and (
+ "process_rule" not in args or not args["process_rule"]
):
raise ValueError("Data source or Process rule is required")
else:
diff --git a/api/services/message_service.py b/api/services/message_service.py
index ecb121c36e..f432a77c80 100644
--- a/api/services/message_service.py
+++ b/api/services/message_service.py
@@ -34,6 +34,7 @@ class MessageService:
conversation_id: str,
first_id: Optional[str],
limit: int,
+ order: str = "asc",
) -> InfiniteScrollPagination:
if not user:
return InfiniteScrollPagination(data=[], limit=limit, has_more=False)
@@ -91,7 +92,8 @@ class MessageService:
if rest_count > 0:
has_more = True
- history_messages = list(reversed(history_messages))
+ if order == "asc":
+ history_messages = list(reversed(history_messages))
return InfiniteScrollPagination(data=history_messages, limit=limit, has_more=has_more)
diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py
index 7ae1b9f231..2bc48c4185 100644
--- a/api/services/tools/tools_transform_service.py
+++ b/api/services/tools/tools_transform_service.py
@@ -74,12 +74,14 @@ class ToolTransformService:
en_US=provider_controller.identity.description.en_US,
zh_Hans=provider_controller.identity.description.zh_Hans,
pt_BR=provider_controller.identity.description.pt_BR,
+ ja_JP=provider_controller.identity.description.ja_JP,
),
icon=provider_controller.identity.icon,
label=I18nObject(
en_US=provider_controller.identity.label.en_US,
zh_Hans=provider_controller.identity.label.zh_Hans,
pt_BR=provider_controller.identity.label.pt_BR,
+ ja_JP=provider_controller.identity.label.ja_JP,
),
type=ToolProviderType.BUILT_IN,
masked_credentials={},
diff --git a/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py
new file mode 100644
index 0000000000..281e866e45
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py
@@ -0,0 +1,59 @@
+import os
+from collections.abc import Callable
+from typing import Any, Literal, Union
+
+import pytest
+
+# import monkeypatch
+from _pytest.monkeypatch import MonkeyPatch
+from nomic import embed
+
+
+def create_embedding(texts: list[str], model: str, **kwargs: Any) -> dict:
+ texts_len = len(texts)
+
+ foo_embedding_sample = 0.123456
+
+ combined = {
+ "embeddings": [[foo_embedding_sample for _ in range(768)] for _ in range(texts_len)],
+ "usage": {"prompt_tokens": texts_len, "total_tokens": texts_len},
+ "model": model,
+ "inference_mode": "remote",
+ }
+
+ return combined
+
+
+def mock_nomic(
+ monkeypatch: MonkeyPatch,
+ methods: list[Literal["text_embedding"]],
+) -> Callable[[], None]:
+ """
+ mock nomic module
+
+ :param monkeypatch: pytest monkeypatch fixture
+ :return: unpatch function
+ """
+
+ def unpatch() -> None:
+ monkeypatch.undo()
+
+ if "text_embedding" in methods:
+ monkeypatch.setattr(embed, "text", create_embedding)
+
+ return unpatch
+
+
+MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
+
+
+@pytest.fixture
+def setup_nomic_mock(request, monkeypatch):
+ methods = request.param if hasattr(request, "param") else []
+ if MOCK:
+ unpatch = mock_nomic(monkeypatch, methods=methods)
+
+ yield
+
+ if MOCK:
+ unpatch()
diff --git a/api/tests/integration_tests/model_runtime/__mock/xinference.py b/api/tests/integration_tests/model_runtime/__mock/xinference.py
index 299523f4f5..8deb50635f 100644
--- a/api/tests/integration_tests/model_runtime/__mock/xinference.py
+++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -9,7 +9,6 @@ from requests.exceptions import ConnectionError
from requests.sessions import Session
from xinference_client.client.restful.restful_client import (
Client,
- RESTfulChatglmCppChatModelHandle,
RESTfulChatModelHandle,
RESTfulEmbeddingModelHandle,
RESTfulGenerateModelHandle,
@@ -19,9 +18,7 @@ from xinference_client.types import Embedding, EmbeddingData, EmbeddingUsage
class MockXinferenceClass:
- def get_chat_model(
- self: Client, model_uid: str
- ) -> Union[RESTfulChatglmCppChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
+ def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
if not re.match(r"https?:\/\/[^\s\/$.?#].[^\s]*$", self.base_url):
raise RuntimeError("404 Not Found")
diff --git a/api/tests/integration_tests/model_runtime/fireworks/__init__.py b/api/tests/integration_tests/model_runtime/fireworks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_llm.py b/api/tests/integration_tests/model_runtime/fireworks/test_llm.py
new file mode 100644
index 0000000000..699ca293a2
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/fireworks/test_llm.py
@@ -0,0 +1,186 @@
+import os
+from collections.abc import Generator
+
+import pytest
+
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+ AssistantPromptMessage,
+ PromptMessageTool,
+ SystemPromptMessage,
+ UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import AIModelEntity
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.fireworks.llm.llm import FireworksLargeLanguageModel
+
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+def test_predefined_models():
+ model = FireworksLargeLanguageModel()
+ model_schemas = model.predefined_models()
+
+ assert len(model_schemas) >= 1
+ assert isinstance(model_schemas[0], AIModelEntity)
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
+ model = FireworksLargeLanguageModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ # model name to gpt-3.5-turbo because of mocking
+ model.validate_credentials(model="gpt-3.5-turbo", credentials={"fireworks_api_key": "invalid_key"})
+
+ model.validate_credentials(
+ model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+ credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+ )
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_invoke_chat_model(setup_openai_mock):
+ model = FireworksLargeLanguageModel()
+
+ result = model.invoke(
+ model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+ credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+ prompt_messages=[
+ SystemPromptMessage(
+ content="You are a helpful AI assistant.",
+ ),
+ UserPromptMessage(content="Hello World!"),
+ ],
+ model_parameters={
+ "temperature": 0.0,
+ "top_p": 1.0,
+ "presence_penalty": 0.0,
+ "frequency_penalty": 0.0,
+ "max_tokens": 10,
+ },
+ stop=["How"],
+ stream=False,
+ user="foo",
+ )
+
+ assert isinstance(result, LLMResult)
+ assert len(result.message.content) > 0
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_invoke_chat_model_with_tools(setup_openai_mock):
+ model = FireworksLargeLanguageModel()
+
+ result = model.invoke(
+ model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+ credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+ prompt_messages=[
+ SystemPromptMessage(
+ content="You are a helpful AI assistant.",
+ ),
+ UserPromptMessage(
+ content="what's the weather today in London?",
+ ),
+ ],
+ model_parameters={"temperature": 0.0, "max_tokens": 100},
+ tools=[
+ PromptMessageTool(
+ name="get_weather",
+ description="Determine weather in my location",
+ parameters={
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
+ "unit": {"type": "string", "enum": ["c", "f"]},
+ },
+ "required": ["location"],
+ },
+ ),
+ PromptMessageTool(
+ name="get_stock_price",
+ description="Get the current stock price",
+ parameters={
+ "type": "object",
+ "properties": {"symbol": {"type": "string", "description": "The stock symbol"}},
+ "required": ["symbol"],
+ },
+ ),
+ ],
+ stream=False,
+ user="foo",
+ )
+
+ assert isinstance(result, LLMResult)
+ assert isinstance(result.message, AssistantPromptMessage)
+ assert len(result.message.tool_calls) > 0
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock):
+ model = FireworksLargeLanguageModel()
+
+ result = model.invoke(
+ model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+ credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+ prompt_messages=[
+ SystemPromptMessage(
+ content="You are a helpful AI assistant.",
+ ),
+ UserPromptMessage(content="Hello World!"),
+ ],
+ model_parameters={"temperature": 0.0, "max_tokens": 100},
+ stream=True,
+ user="foo",
+ )
+
+ assert isinstance(result, Generator)
+
+ for chunk in result:
+ assert isinstance(chunk, LLMResultChunk)
+ assert isinstance(chunk.delta, LLMResultChunkDelta)
+ assert isinstance(chunk.delta.message, AssistantPromptMessage)
+ assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+ if chunk.delta.finish_reason is not None:
+ assert chunk.delta.usage is not None
+ assert chunk.delta.usage.completion_tokens > 0
+
+
+def test_get_num_tokens():
+ model = FireworksLargeLanguageModel()
+
+ num_tokens = model.get_num_tokens(
+ model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+ credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+ prompt_messages=[UserPromptMessage(content="Hello World!")],
+ )
+
+ assert num_tokens == 10
+
+ num_tokens = model.get_num_tokens(
+ model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+ credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+ prompt_messages=[
+ SystemPromptMessage(
+ content="You are a helpful AI assistant.",
+ ),
+ UserPromptMessage(content="Hello World!"),
+ ],
+ tools=[
+ PromptMessageTool(
+ name="get_weather",
+ description="Determine weather in my location",
+ parameters={
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
+ "unit": {"type": "string", "enum": ["c", "f"]},
+ },
+ "required": ["location"],
+ },
+ ),
+ ],
+ )
+
+ assert num_tokens == 77
diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_provider.py b/api/tests/integration_tests/model_runtime/fireworks/test_provider.py
new file mode 100644
index 0000000000..a68cf1a1a8
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/fireworks/test_provider.py
@@ -0,0 +1,17 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.fireworks.fireworks import FireworksProvider
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_validate_provider_credentials(setup_openai_mock):
+ provider = FireworksProvider()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ provider.validate_provider_credentials(credentials={})
+
+ provider.validate_provider_credentials(credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")})
diff --git a/api/tests/integration_tests/model_runtime/nomic/__init__.py b/api/tests/integration_tests/model_runtime/nomic/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py
new file mode 100644
index 0000000000..52dc96ee95
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py
@@ -0,0 +1,62 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_validate_credentials(setup_nomic_mock):
+ model = NomicTextEmbeddingModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ model.validate_credentials(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": "invalid_key",
+ },
+ )
+
+ model.validate_credentials(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ )
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_invoke_model(setup_nomic_mock):
+ model = NomicTextEmbeddingModel()
+
+ result = model.invoke(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ texts=["hello", "world"],
+ user="foo",
+ )
+
+ assert isinstance(result, TextEmbeddingResult)
+ assert result.model == "nomic-embed-text-v1.5"
+ assert len(result.embeddings) == 2
+ assert result.usage.total_tokens == 2
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_get_num_tokens(setup_nomic_mock):
+ model = NomicTextEmbeddingModel()
+
+ num_tokens = model.get_num_tokens(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ texts=["hello", "world"],
+ )
+
+ assert num_tokens == 2
diff --git a/api/tests/integration_tests/model_runtime/nomic/test_provider.py b/api/tests/integration_tests/model_runtime/nomic/test_provider.py
new file mode 100644
index 0000000000..6cad400c06
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/nomic/test_provider.py
@@ -0,0 +1,22 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.nomic.nomic import NomicAtlasProvider
+from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_validate_provider_credentials(setup_nomic_mock):
+ provider = NomicAtlasProvider()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ provider.validate_provider_credentials(credentials={})
+
+ provider.validate_provider_credentials(
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ )
diff --git a/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py
new file mode 100644
index 0000000000..ba3c1eb5e0
--- /dev/null
+++ b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py
@@ -0,0 +1,91 @@
+from uuid import uuid4
+
+from constants import UUID_NIL
+from core.prompt.utils.extract_thread_messages import extract_thread_messages
+
+
+class TestMessage:
+ def __init__(self, id, parent_message_id):
+ self.id = id
+ self.parent_message_id = parent_message_id
+
+ def __getitem__(self, item):
+ return getattr(self, item)
+
+
+def test_extract_thread_messages_single_message():
+ messages = [TestMessage(str(uuid4()), UUID_NIL)]
+ result = extract_thread_messages(messages)
+ assert len(result) == 1
+ assert result[0] == messages[0]
+
+
+def test_extract_thread_messages_linear_thread():
+ id1, id2, id3, id4, id5 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+ messages = [
+ TestMessage(id5, id4),
+ TestMessage(id4, id3),
+ TestMessage(id3, id2),
+ TestMessage(id2, id1),
+ TestMessage(id1, UUID_NIL),
+ ]
+ result = extract_thread_messages(messages)
+ assert len(result) == 5
+ assert [msg["id"] for msg in result] == [id5, id4, id3, id2, id1]
+
+
+def test_extract_thread_messages_branched_thread():
+ id1, id2, id3, id4 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+ messages = [
+ TestMessage(id4, id2),
+ TestMessage(id3, id2),
+ TestMessage(id2, id1),
+ TestMessage(id1, UUID_NIL),
+ ]
+ result = extract_thread_messages(messages)
+ assert len(result) == 3
+ assert [msg["id"] for msg in result] == [id4, id2, id1]
+
+
+def test_extract_thread_messages_empty_list():
+ messages = []
+ result = extract_thread_messages(messages)
+ assert len(result) == 0
+
+
+def test_extract_thread_messages_partially_loaded():
+ id0, id1, id2, id3 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+ messages = [
+ TestMessage(id3, id2),
+ TestMessage(id2, id1),
+ TestMessage(id1, id0),
+ ]
+ result = extract_thread_messages(messages)
+ assert len(result) == 3
+ assert [msg["id"] for msg in result] == [id3, id2, id1]
+
+
+def test_extract_thread_messages_legacy_messages():
+ id1, id2, id3 = str(uuid4()), str(uuid4()), str(uuid4())
+ messages = [
+ TestMessage(id3, UUID_NIL),
+ TestMessage(id2, UUID_NIL),
+ TestMessage(id1, UUID_NIL),
+ ]
+ result = extract_thread_messages(messages)
+ assert len(result) == 3
+ assert [msg["id"] for msg in result] == [id3, id2, id1]
+
+
+def test_extract_thread_messages_mixed_with_legacy_messages():
+ id1, id2, id3, id4, id5 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+ messages = [
+ TestMessage(id5, id4),
+ TestMessage(id4, id2),
+ TestMessage(id3, id2),
+ TestMessage(id2, UUID_NIL),
+ TestMessage(id1, UUID_NIL),
+ ]
+ result = extract_thread_messages(messages)
+ assert len(result) == 4
+ assert [msg["id"] for msg in result] == [id5, id4, id2, id1]
diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh
index aba13292ab..4c0083a2de 100755
--- a/dev/pytest/pytest_model_runtime.sh
+++ b/dev/pytest/pytest_model_runtime.sh
@@ -6,5 +6,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \
api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
- api/tests/integration_tests/model_runtime/upstage
-
+ api/tests/integration_tests/model_runtime/upstage \
+ api/tests/integration_tests/model_runtime/fireworks \
+ api/tests/integration_tests/model_runtime/nomic
diff --git a/docker-legacy/docker-compose.middleware.yaml b/docker-legacy/docker-compose.middleware.yaml
index fadbb3e608..da54fe33fd 100644
--- a/docker-legacy/docker-compose.middleware.yaml
+++ b/docker-legacy/docker-compose.middleware.yaml
@@ -73,7 +73,7 @@ services:
# ssrf_proxy server
# for more information, please refer to
- # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed
+ # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
ssrf_proxy:
image: ubuntu/squid:latest
restart: always
diff --git a/docker-legacy/docker-compose.yaml b/docker-legacy/docker-compose.yaml
index f8c5700cd9..1636bb6a21 100644
--- a/docker-legacy/docker-compose.yaml
+++ b/docker-legacy/docker-compose.yaml
@@ -2,7 +2,7 @@ version: '3'
services:
# API service
api:
- image: langgenius/dify-api:0.8.2
+ image: langgenius/dify-api:0.8.3
restart: always
environment:
# Startup mode, 'api' starts the API server.
@@ -227,7 +227,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
- image: langgenius/dify-api:0.8.2
+ image: langgenius/dify-api:0.8.3
restart: always
environment:
CONSOLE_WEB_URL: ''
@@ -396,7 +396,7 @@ services:
# Frontend web application.
web:
- image: langgenius/dify-web:0.8.2
+ image: langgenius/dify-web:0.8.3
restart: always
environment:
# The base URL of console application api server, refers to the Console base URL of WEB service if console domain is
@@ -500,7 +500,7 @@ services:
# ssrf_proxy server
# for more information, please refer to
- # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed
+ # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
ssrf_proxy:
image: ubuntu/squid:latest
restart: always
diff --git a/docker/.env.example b/docker/.env.example
index c892c15636..7eaaceb928 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -563,6 +563,11 @@ CODE_MAX_STRING_ARRAY_LENGTH=30
CODE_MAX_OBJECT_ARRAY_LENGTH=30
CODE_MAX_NUMBER_ARRAY_LENGTH=1000
+# Workflow runtime configuration
+WORKFLOW_MAX_EXECUTION_STEPS=500
+WORKFLOW_MAX_EXECUTION_TIME=1200
+WORKFLOW_CALL_MAX_DEPTH=5
+
# SSRF Proxy server HTTP URL
SSRF_PROXY_HTTP_URL=http://ssrf_proxy:3128
# SSRF Proxy server HTTPS URL
diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml
index 251c62fee1..d7900def73 100644
--- a/docker/docker-compose.middleware.yaml
+++ b/docker/docker-compose.middleware.yaml
@@ -63,7 +63,7 @@ services:
# ssrf_proxy server
# for more information, please refer to
- # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed
+ # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
ssrf_proxy:
image: ubuntu/squid:latest
restart: always
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index 0fbc695177..16bef279bc 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -202,13 +202,16 @@ x-shared-env: &shared-api-worker-env
CODE_MAX_STRING_ARRAY_LENGTH: ${CODE_MAX_STRING_ARRAY_LENGTH:-30}
CODE_MAX_OBJECT_ARRAY_LENGTH: ${CODE_MAX_OBJECT_ARRAY_LENGTH:-30}
CODE_MAX_NUMBER_ARRAY_LENGTH: ${CODE_MAX_NUMBER_ARRAY_LENGTH:-1000}
+ WORKFLOW_MAX_EXECUTION_STEPS: ${WORKFLOW_MAX_EXECUTION_STEPS:-500}
+ WORKFLOW_MAX_EXECUTION_TIME: ${WORKFLOW_MAX_EXECUTION_TIME:-1200}
+ WORKFLOW_CALL_MAX_DEPTH: ${WORKFLOW_MAX_EXECUTION_TIME:-5}
SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128}
SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128}
services:
# API service
api:
- image: langgenius/dify-api:0.8.2
+ image: langgenius/dify-api:0.8.3
restart: always
environment:
# Use the shared environment variables.
@@ -228,7 +231,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
- image: langgenius/dify-api:0.8.2
+ image: langgenius/dify-api:0.8.3
restart: always
environment:
# Use the shared environment variables.
@@ -247,7 +250,7 @@ services:
# Frontend web application.
web:
- image: langgenius/dify-web:0.8.2
+ image: langgenius/dify-web:0.8.3
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
diff --git a/web/app/activate/activateForm.tsx b/web/app/activate/activateForm.tsx
index 3b1eed6f09..8e9691b354 100644
--- a/web/app/activate/activateForm.tsx
+++ b/web/app/activate/activateForm.tsx
@@ -143,6 +143,7 @@ const ActivateForm = () => {
onChange={e => setName(e.target.value)}
placeholder={t('login.namePlaceholder') || ''}
className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'}
+ tabIndex={1}
/>
@@ -159,6 +160,7 @@ const ActivateForm = () => {
onChange={e => setPassword(e.target.value)}
placeholder={t('login.passwordPlaceholder') || ''}
className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'}
+ tabIndex={2}
/>
{t('login.error.passwordInvalid')}
diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
index 08d06871b2..dc79042525 100644
--- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
+++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
@@ -265,7 +265,7 @@ const SettingsModal: FC = ({
diff --git a/web/app/components/develop/template/template_workflow.en.mdx b/web/app/components/develop/template/template_workflow.en.mdx
index 2bd0fe9daf..5c712c2c29 100644
--- a/web/app/components/develop/template/template_workflow.en.mdx
+++ b/web/app/components/develop/template/template_workflow.en.mdx
@@ -424,7 +424,7 @@ Workflow applications offers non-session support and is ideal for translation, a
/>