diff --git a/api/.env.example b/api/.env.example
index 3cff88c400..084ef63c60 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -162,6 +162,8 @@ PGVECTOR_PORT=5433
PGVECTOR_USER=postgres
PGVECTOR_PASSWORD=postgres
PGVECTOR_DATABASE=postgres
+PGVECTOR_MIN_CONNECTION=1
+PGVECTOR_MAX_CONNECTION=5
# Tidb Vector configuration
TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com
diff --git a/api/configs/middleware/vdb/pgvector_config.py b/api/configs/middleware/vdb/pgvector_config.py
index 395dcaa420..85f5dca7e2 100644
--- a/api/configs/middleware/vdb/pgvector_config.py
+++ b/api/configs/middleware/vdb/pgvector_config.py
@@ -33,3 +33,13 @@ class PGVectorConfig(BaseSettings):
description="Name of the PostgreSQL database to connect to",
default=None,
)
+
+ PGVECTOR_MIN_CONNECTION: PositiveInt = Field(
+ description="Min connection of the PostgreSQL database",
+ default=1,
+ )
+
+ PGVECTOR_MAX_CONNECTION: PositiveInt = Field(
+ description="Max connection of the PostgreSQL database",
+ default=5,
+ )
diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py
index 2c4e5ac607..5a763b3457 100644
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -563,10 +563,10 @@ class DatasetRetrievalSettingApi(Resource):
case (
VectorType.MILVUS
| VectorType.RELYT
- | VectorType.PGVECTOR
| VectorType.TIDB_VECTOR
| VectorType.CHROMA
| VectorType.TENCENT
+ | VectorType.PGVECTO_RS
):
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
case (
@@ -577,6 +577,7 @@ class DatasetRetrievalSettingApi(Resource):
| VectorType.MYSCALE
| VectorType.ORACLE
| VectorType.ELASTICSEARCH
+ | VectorType.PGVECTOR
):
return {
"retrieval_method": [
diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py
index 4797b69b85..807f09598c 100644
--- a/api/core/entities/provider_configuration.py
+++ b/api/core/entities/provider_configuration.py
@@ -119,7 +119,7 @@ class ProviderConfiguration(BaseModel):
credentials = model_configuration.credentials
break
- if self.custom_configuration.provider:
+ if not credentials and self.custom_configuration.provider:
credentials = self.custom_configuration.provider.credentials
return credentials
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
new file mode 100644
index 0000000000..31415a24fa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+label:
+ zh_Hans: Llama 3.2 11B Vision Instruct
+ en_US: Llama 3.2 11B Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
new file mode 100644
index 0000000000..c2fd77d256
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-1b-instruct
+label:
+ zh_Hans: Llama 3.2 1B Instruct
+ en_US: Llama 3.2 1B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
new file mode 100644
index 0000000000..4b3c459c7b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-3b-instruct
+label:
+ zh_Hans: Llama 3.2 3B Instruct
+ en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
new file mode 100644
index 0000000000..0aece7455d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+label:
+ zh_Hans: Llama 3.2 90B Vision Instruct
+ en_US: Llama 3.2 90B Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
new file mode 100644
index 0000000000..d84e9937e0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-001
+label:
+ en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 0000000000..2ff70564b2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-002
+label:
+ en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
index bbc697e934..4e0209890a 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
new file mode 100644
index 0000000000..2aea8149f4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-8b-exp-0924
+label:
+ en_US: Gemini 1.5 Flash 8B 0924
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
index c5695e5dda..faabc5e4d1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
index 24b1c5af8a..a22fcca941 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-flash-latest
label:
- en_US: Gemini 1.5 Flash
+ en_US: Gemini 1.5 Flash Latest
model_type: llm
features:
- agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
new file mode 100644
index 0000000000..dfd55c3a94
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash
+label:
+ en_US: Gemini 1.5 Flash
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
new file mode 100644
index 0000000000..a1feff171d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-001
+label:
+ en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 0000000000..9ae07a06c5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-002
+label:
+ en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
index 0a918e0d7b..97c68f7a18 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
index 7452ce46e7..860e4816a1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
index b3e1ecf3af..d1bf7d269d 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-pro-latest
label:
- en_US: Gemini 1.5 Pro
+ en_US: Gemini 1.5 Pro Latest
model_type: llm
features:
- agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
new file mode 100644
index 0000000000..bdd70b34a2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro
+label:
+ en_US: Gemini 1.5 Pro
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
index 075e484e46..2d213d56ad 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@@ -27,6 +27,15 @@ parameter_rules:
default: 4096
min: 1
max: 4096
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
index 4e9f59e7da..e2f487c1ee 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@@ -31,6 +31,15 @@ parameter_rules:
max: 2048
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/llm.py b/api/core/model_runtime/model_providers/google/llm/llm.py
index 3fc6787a44..e686ad08d9 100644
--- a/api/core/model_runtime/model_providers/google/llm/llm.py
+++ b/api/core/model_runtime/model_providers/google/llm/llm.py
@@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
import google.generativeai as genai
import requests
from google.api_core import exceptions
-from google.generativeai import client
-from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
+from google.generativeai.client import _ClientManager
+from google.generativeai.types import ContentType, GenerateContentResponse
from google.generativeai.types.content_types import to_part
from PIL import Image
@@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
history.append(content)
# Create a new ClientManager with tenant's API key
- new_client_manager = client._ClientManager()
+ new_client_manager = _ClientManager()
new_client_manager.configure(api_key=credentials["google_api_key"])
new_custom_client = new_client_manager.make_client("generative")
google_model._client = new_custom_client
- safety_settings = {
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
- }
-
response = google_model.generate_content(
contents=history,
generation_config=genai.types.GenerationConfig(**config_kwargs),
stream=stream,
- safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
request_options={"timeout": 600},
)
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
new file mode 100644
index 0000000000..019d453723
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-11b-text-preview
+label:
+ zh_Hans: Llama 3.2 11B Text (Preview)
+ en_US: Llama 3.2 11B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
new file mode 100644
index 0000000000..a44e4ff508
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-1b-preview
+label:
+ zh_Hans: Llama 3.2 1B Text (Preview)
+ en_US: Llama 3.2 1B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
new file mode 100644
index 0000000000..f2fdd0a05e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-3b-preview
+label:
+ zh_Hans: Llama 3.2 3B Text (Preview)
+ en_US: Llama 3.2 3B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
new file mode 100644
index 0000000000..3b34e7c079
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-90b-text-preview
+label:
+ zh_Hans: Llama 3.2 90B Text (Preview)
+ en_US: Llama 3.2 90B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index ff732e6925..a7ea53e0e9 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
if chunk_json["done"]:
# calculate num tokens
- if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
- # transform usage
+ if "prompt_eval_count" in chunk_json:
prompt_tokens = chunk_json["prompt_eval_count"]
- completion_tokens = chunk_json["eval_count"]
else:
- # calculate num tokens
- prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
- completion_tokens = self._get_num_tokens_by_gpt2(full_text)
+ prompt_message_content = prompt_messages[0].content
+ if isinstance(prompt_message_content, str):
+ prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
+ else:
+ content_text = ""
+ for message_content in prompt_message_content:
+ if message_content.type == PromptMessageContentType.TEXT:
+ message_content = cast(TextPromptMessageContent, message_content)
+ content_text += message_content.data
+ prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
+
+ completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index a3e5d0981f..8d1df82140 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,18 +1,17 @@
- Qwen/Qwen2.5-72B-Instruct
-- Qwen/Qwen2.5-Math-72B-Instruct
- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen2.5-14B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-Coder-7B-Instruct
-- deepseek-ai/DeepSeek-V2.5
+- Qwen/Qwen2.5-Math-72B-Instruct
- Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct
+- deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct
- THUDM/glm-4-9b-chat
-- THUDM/chatglm3-6b
- 01-ai/Yi-1.5-34B-Chat-16K
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
@@ -26,13 +25,4 @@
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- mistralai/Mistral-7B-Instruct-v0.2
-- Pro/Qwen/Qwen2-7B-Instruct
-- Pro/Qwen/Qwen2-1.5B-Instruct
-- Pro/THUDM/glm-4-9b-chat
-- Pro/THUDM/chatglm3-6b
-- Pro/01-ai/Yi-1.5-9B-Chat-16K
-- Pro/01-ai/Yi-1.5-6B-Chat
-- Pro/internlm/internlm2_5-7b-chat
-- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
-- Pro/meta-llama/Meta-Llama-3-8B-Instruct
-- Pro/google/gemma-2-9b-it
+- mistralai/Mixtral-8x7B-Instruct-v0.1
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml
new file mode 100644
index 0000000000..d9663582e5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml
@@ -0,0 +1,30 @@
+model: internlm/internlm2_5-20b-chat
+label:
+ en_US: internlm/internlm2_5-20b-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1'
+ output: '1'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
index 27664eab6c..89fb153ba0 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@@ -28,3 +28,4 @@ pricing:
output: '0'
unit: '0.000001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
index fd7aada428..2785e7496f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@@ -28,3 +28,4 @@ pricing:
output: '1.26'
unit: '0.000001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-7b-instruct.yaml
new file mode 100644
index 0000000000..76526200cc
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-7b-instruct.yaml
@@ -0,0 +1,74 @@
+model: Qwen/Qwen2.5-Coder-7B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-Coder-7B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-math-72b-instruct.yaml
new file mode 100644
index 0000000000..90afa0cfd5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-math-72b-instruct.yaml
@@ -0,0 +1,74 @@
+model: Qwen/Qwen2.5-Math-72B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-Math-72B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '4.13'
+ output: '4.13'
+ unit: '0.000001'
+ currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
index d0ff443827..34a57d1fc0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: farui-plus
label:
en_US: farui-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index f90c7f075f..3e3585b30a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -18,7 +18,7 @@ from dashscope.common.error import (
UnsupportedModel,
)
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
ImagePromptMessageContent,
@@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
FetchFrom,
I18nObject,
ModelFeature,
+ ModelPropertyKey,
ModelType,
ParameterRule,
ParameterType,
@@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param tools: tools for tool calling
:return:
"""
+ # Check if the model was added via get_customizable_model_schema
+ if self.get_customizable_model_schema(model, credentials) is not None:
+ # For custom models, tokens are not calculated.
+ return 0
+
if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
model = model.replace("-chat", "")
if model == "farui-plus":
@@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param credentials: model credentials
:return: AIModelEntity or None
"""
- rules = [
- ParameterRule(
- name="temperature",
- type=ParameterType.FLOAT,
- use_template="temperature",
- label=I18nObject(zh_Hans="温度", en_US="Temperature"),
- ),
- ParameterRule(
- name="top_p",
- type=ParameterType.FLOAT,
- use_template="top_p",
- label=I18nObject(zh_Hans="Top P", en_US="Top P"),
- ),
- ParameterRule(
- name="top_k",
- type=ParameterType.INT,
- min=0,
- max=99,
- label=I18nObject(zh_Hans="top_k", en_US="top_k"),
- ),
- ParameterRule(
- name="max_tokens",
- type=ParameterType.INT,
- min=1,
- max=128000,
- default=1024,
- label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
- ),
- ParameterRule(
- name="seed",
- type=ParameterType.INT,
- default=1234,
- label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
- ),
- ParameterRule(
- name="repetition_penalty",
- type=ParameterType.FLOAT,
- default=1.1,
- label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
- ),
- ]
-
- entity = AIModelEntity(
+ return AIModelEntity(
model=model,
- label=I18nObject(en_US=model),
- fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ label=I18nObject(en_US=model, zh_Hans=model),
model_type=ModelType.LLM,
- model_properties={},
- parameter_rules=rules,
+ features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
+ if credentials.get("function_calling_type") == "tool_call"
+ else [],
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_properties={
+ ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
+ ModelPropertyKey.MODE: LLMMode.CHAT.value,
+ },
+ parameter_rules=[
+ ParameterRule(
+ name="temperature",
+ use_template="temperature",
+ label=I18nObject(en_US="Temperature", zh_Hans="温度"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="max_tokens",
+ use_template="max_tokens",
+ default=512,
+ min=1,
+ max=int(credentials.get("max_tokens", 1024)),
+ label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+ type=ParameterType.INT,
+ ),
+ ParameterRule(
+ name="top_p",
+ use_template="top_p",
+ label=I18nObject(en_US="Top P", zh_Hans="Top P"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="top_k",
+ use_template="top_k",
+ label=I18nObject(en_US="Top K", zh_Hans="Top K"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="frequency_penalty",
+ use_template="frequency_penalty",
+ label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
+ type=ParameterType.FLOAT,
+ ),
+ ],
)
-
- return entity
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
index d9792e71ee..64a3f33133 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-0919
label:
en_US: qwen-coder-turbo-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
index 0b03505c45..a4c93f7047 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-latest
label:
en_US: qwen-coder-turbo-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
index 2a6c040853..ff68faed80 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo
label:
en_US: qwen-coder-turbo
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
index bad7f4f472..c3dbb3616f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@@ -1,4 +1,4 @@
-# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-long
label:
en_US: qwen-long
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
index c14aee1e1e..42fe1f6862 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0816
label:
en_US: qwen-math-plus-0816
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
index 9d74eeca3e..9b6567b8cd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0919
label:
en_US: qwen-math-plus-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
index b8601a969a..b2a2393b36 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-latest
label:
en_US: qwen-math-plus-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
index 4a948be597..63f4b7ff0a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus
label:
en_US: qwen-math-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
index bffe324a96..4da90eec3e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-0919
label:
en_US: qwen-math-turbo-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
index 0747e96614..d29f8851dd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-latest
label:
en_US: qwen-math-turbo-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
index dffb5557ff..2a8f7f725e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo
label:
en_US: qwen-math-turbo
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
index 8ae159f1bf..ef1841b517 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0107
label:
en_US: qwen-max-0107
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
index 93fb37254e..a2ea5df130 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0403, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0403
label:
en_US: qwen-max-0403
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
index a5c9d49609..a467665f11 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0428, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0428
label:
en_US: qwen-max-0428
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
index e4a6dae637..78661eaea0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0919
label:
en_US: qwen-max-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
index 6fae8a7d38..6f4674576b 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-1201
label:
en_US: qwen-max-1201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
index 8e20968859..8b5f005473 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-latest
label:
en_US: qwen-max-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
index 9bc50c73fc..098494ff95 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-longcontext
label:
en_US: qwen-max-longcontext
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
index c6a64dc507..9d0d3f8db3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max
label:
en_US: qwen-max
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
index 430599300b..0b1a6f81df 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0206
label:
en_US: qwen-plus-0206
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
index 906995d2b9..7706005bb5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0624
label:
en_US: qwen-plus-0624
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
index b33e725dd0..348276fc08 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0723, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0723
label:
en_US: qwen-plus-0723
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
index bb394fad81..29f125135e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0806, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0806
label:
en_US: qwen-plus-0806
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
index 118e304a97..905fa1e102 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0919
label:
en_US: qwen-plus-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
index 761312bc38..c7a3549727 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-chat
label:
en_US: qwen-plus-chat
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
index 430872fb31..608f52c296 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-latest
label:
en_US: qwen-plus-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
index f3fce30209..9089e57255 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus
label:
en_US: qwen-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
index 2628d824fe..7ee0d44f2f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -1,3 +1,6 @@
+# this model corresponds to qwen-turbo-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
+
model: qwen-turbo-0206
label:
en_US: qwen-turbo-0206
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
index 8097459bf0..20a3f7eb64 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0624
label:
en_US: qwen-turbo-0624
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
index e43beeb195..ba73dec363 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0919
label:
en_US: qwen-turbo-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
index c30cb7ca10..d785b7fe85 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-chat
label:
en_US: qwen-turbo-chat
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
index e443d6888b..fe38a4283c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-latest
label:
en_US: qwen-turbo-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
index 33f05967c2..215c9ec5fc 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo
label:
en_US: qwen-turbo
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
index 63b6074d0d..d80168ffc3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max-0201
label:
en_US: qwen-vl-max-0201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
index fd20377002..50e10226a5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max-0809
label:
en_US: qwen-vl-max-0809
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
index 31a9fb51bb..21b127f56c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max
label:
en_US: qwen-vl-max
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
index 5f90cf48bc..03cb039d15 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus-0201
label:
en_US: qwen-vl-plus-0201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
index 97820c0f3a..67b2b2ebdd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus-0809
label:
en_US: qwen-vl-plus-0809
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
index 6af36cd6f3..f55764c6c0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus
label:
en_US: qwen-vl-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
index 158e2c7ee1..ea157f42de 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-1.5b-instruct
label:
en_US: qwen2-math-1.5b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
index e26a6923d1..37052a9233 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-72b-instruct
label:
en_US: qwen2-math-72b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
index 589119b26e..e182f1c27f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-7b-instruct
label:
en_US: qwen2-math-7b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
index dd608fbf76..9e75ccc1f2 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-0.5b-instruct
label:
en_US: qwen2.5-0.5b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
index 08237b3958..67c9d31243 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-1.5b-instruct
label:
en_US: qwen2.5-1.5b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
index 640b019703..2a38be921c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-14b-instruct
label:
en_US: qwen2.5-14b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
index 3a90ca7532..e6e4fbf978 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-32b-instruct
label:
en_US: qwen2.5-32b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
index b79755eb9b..8f250379a7 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-3b-instruct
label:
en_US: qwen2.5-3b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
index e9dd51a341..bb3cdd6141 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-72b-instruct
label:
en_US: qwen2.5-72b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
index 04f26cf5fe..fdcd3d4275 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-7b-instruct
label:
en_US: qwen2.5-7b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
index 04f26cf5fe..7ebeec3953 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
@@ -1,6 +1,7 @@
-model: qwen2.5-7b-instruct
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
+model: qwen2.5-coder-7b-instruct
label:
- en_US: qwen2.5-7b-instruct
+ en_US: qwen2.5-coder-7b-instruct
model_type: llm
features:
- agent-thought
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
index f4303c53d3..52e35d8b50 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v1
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
index f6be3544ed..5bb6a8f424 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v2
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
index 171a379ee2..d8af0e2b63 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v3
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
index fabe6d90e6..1a09c20fd9 100644
--- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
@@ -37,14 +37,51 @@ model_credential_schema:
en_US: Model Name
zh_Hans: 模型名称
placeholder:
- en_US: Enter full model name
- zh_Hans: 输入模型全称
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
credential_form_schemas:
- variable: dashscope_api_key
- required: true
label:
en_US: API Key
type: secret-input
+ required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
+ - variable: context_size
+ label:
+ zh_Hans: 模型上下文长度
+ en_US: Model context size
+ required: true
+ type: text-input
+ default: '4096'
+ placeholder:
+ zh_Hans: 在此输入您的模型上下文长度
+ en_US: Enter your Model context size
+ - variable: max_tokens
+ label:
+ zh_Hans: 最大 token 上限
+ en_US: Upper bound for max tokens
+ default: '4096'
+ type: text-input
+ show_on:
+ - variable: __model_type
+ value: llm
+ - variable: function_calling_type
+ label:
+ en_US: Function calling
+ type: select
+ required: false
+ default: no_call
+ options:
+ - value: no_call
+ label:
+ en_US: Not Support
+ zh_Hans: 不支持
+ - value: function_call
+ label:
+ en_US: Support
+ zh_Hans: 支持
+ show_on:
+ - variable: __model_type
+ value: llm
diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
index 612542dab1..6dcd98dcfd 100644
--- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
+++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
@@ -40,19 +40,8 @@ class AnalyticdbConfig(BaseModel):
class AnalyticdbVector(BaseVector):
- _instance = None
- _init = False
-
- def __new__(cls, *args, **kwargs):
- if cls._instance is None:
- cls._instance = super().__new__(cls)
- return cls._instance
-
def __init__(self, collection_name: str, config: AnalyticdbConfig):
- # collection_name must be updated every time
self._collection_name = collection_name.lower()
- if AnalyticdbVector._init:
- return
try:
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_tea_openapi import models as open_api_models
@@ -62,7 +51,6 @@ class AnalyticdbVector(BaseVector):
self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params())
self._client = Client(self._client_config)
self._initialize()
- AnalyticdbVector._init = True
def _initialize(self) -> None:
cache_key = f"vector_indexing_{self.config.instance_id}"
@@ -257,11 +245,14 @@ class AnalyticdbVector(BaseVector):
documents = []
for match in response.body.matches.match:
if match.score > score_threshold:
+ metadata = json.loads(match.metadata.get("metadata_"))
+ metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
- metadata=json.loads(match.metadata.get("metadata_")),
+ metadata=metadata,
)
documents.append(doc)
+ documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
@@ -286,12 +277,14 @@ class AnalyticdbVector(BaseVector):
for match in response.body.matches.match:
if match.score > score_threshold:
metadata = json.loads(match.metadata.get("metadata_"))
+ metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
vector=match.metadata.get("vector"),
metadata=metadata,
)
documents.append(doc)
+ documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents
def delete(self) -> None:
diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py
index 79879d4f63..d90707ebcd 100644
--- a/api/core/rag/datasource/vdb/pgvector/pgvector.py
+++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py
@@ -23,6 +23,8 @@ class PGVectorConfig(BaseModel):
user: str
password: str
database: str
+ min_connection: int
+ max_connection: int
@model_validator(mode="before")
@classmethod
@@ -37,6 +39,12 @@ class PGVectorConfig(BaseModel):
raise ValueError("config PGVECTOR_PASSWORD is required")
if not values["database"]:
raise ValueError("config PGVECTOR_DATABASE is required")
+ if not values["min_connection"]:
+ raise ValueError("config PGVECTOR_MIN_CONNECTION is required")
+ if not values["max_connection"]:
+ raise ValueError("config PGVECTOR_MAX_CONNECTION is required")
+ if values["min_connection"] > values["max_connection"]:
+ raise ValueError("config PGVECTOR_MIN_CONNECTION should less than PGVECTOR_MAX_CONNECTION")
return values
@@ -61,8 +69,8 @@ class PGVector(BaseVector):
def _create_connection_pool(self, config: PGVectorConfig):
return psycopg2.pool.SimpleConnectionPool(
- 1,
- 5,
+ config.min_connection,
+ config.max_connection,
host=config.host,
port=config.port,
user=config.user,
@@ -213,5 +221,7 @@ class PGVectorFactory(AbstractVectorFactory):
user=dify_config.PGVECTOR_USER,
password=dify_config.PGVECTOR_PASSWORD,
database=dify_config.PGVECTOR_DATABASE,
+ min_connection=dify_config.PGVECTOR_MIN_CONNECTION,
+ max_connection=dify_config.PGVECTOR_MAX_CONNECTION,
),
)
diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py
index faa373017b..39e3a7f6cf 100644
--- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py
+++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py
@@ -56,7 +56,7 @@ class TencentVector(BaseVector):
return self._client.create_database(database_name=self._client_config.database)
def get_type(self) -> str:
- return "tencent"
+ return VectorType.TENCENT
def to_index_struct(self) -> dict:
return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}}
diff --git a/api/core/rag/datasource/vdb/vector_base.py b/api/core/rag/datasource/vdb/vector_base.py
index 1a0dc7f48b..22e191340d 100644
--- a/api/core/rag/datasource/vdb/vector_base.py
+++ b/api/core/rag/datasource/vdb/vector_base.py
@@ -45,6 +45,7 @@ class BaseVector(ABC):
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
raise NotImplementedError
+ @abstractmethod
def delete(self) -> None:
raise NotImplementedError
diff --git a/api/core/tools/utils/feishu_api_utils.py b/api/core/tools/utils/feishu_api_utils.py
index ffdb06498f..ce1fd7dc19 100644
--- a/api/core/tools/utils/feishu_api_utils.py
+++ b/api/core/tools/utils/feishu_api_utils.py
@@ -1,9 +1,23 @@
import httpx
+from core.tools.errors import ToolProviderCredentialValidationError
from extensions.ext_redis import redis_client
+def auth(credentials):
+ app_id = credentials.get("app_id")
+ app_secret = credentials.get("app_secret")
+ if not app_id or not app_secret:
+ raise ToolProviderCredentialValidationError("app_id and app_secret is required")
+ try:
+ assert FeishuRequest(app_id, app_secret).tenant_access_token is not None
+ except Exception as e:
+ raise ToolProviderCredentialValidationError(str(e))
+
+
class FeishuRequest:
+ API_BASE_URL = "https://lark-plugin-api.solutionsuite.cn/lark-plugin"
+
def __init__(self, app_id: str, app_secret: str):
self.app_id = app_id
self.app_secret = app_secret
@@ -42,7 +56,7 @@ class FeishuRequest:
"expire": 7200
}
"""
- url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/access_token/get_tenant_access_token"
+ url = f"{self.API_BASE_URL}/access_token/get_tenant_access_token"
payload = {"app_id": app_id, "app_secret": app_secret}
res = self._send_request(url, require_token=False, payload=payload)
return res
@@ -63,7 +77,7 @@ class FeishuRequest:
"msg": "创建飞书文档成功,请查看"
}
"""
- url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/create_document"
+ url = f"{self.API_BASE_URL}/document/create_document"
payload = {
"title": title,
"content": content,
@@ -72,13 +86,13 @@ class FeishuRequest:
res = self._send_request(url, payload=payload)
return res.get("data")
- def write_document(self, document_id: str, content: str, position: str = "start") -> dict:
- url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/write_document"
+ def write_document(self, document_id: str, content: str, position: str = "end") -> dict:
+ url = f"{self.API_BASE_URL}/document/write_document"
payload = {"document_id": document_id, "content": content, "position": position}
res = self._send_request(url, payload=payload)
return res
- def get_document_content(self, document_id: str, mode: str, lang: int = 0) -> dict:
+ def get_document_content(self, document_id: str, mode: str = "markdown", lang: str = "0") -> dict:
"""
API url: https://open.larkoffice.com/document/server-docs/docs/docs/docx-v1/document/raw_content
Example Response:
@@ -95,45 +109,404 @@ class FeishuRequest:
"mode": mode,
"lang": lang,
}
- url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/get_document_content"
- res = self._send_request(url, method="get", params=params)
+ url = f"{self.API_BASE_URL}/document/get_document_content"
+ res = self._send_request(url, method="GET", params=params)
return res.get("data").get("content")
- def list_document_blocks(self, document_id: str, page_token: str, page_size: int = 500) -> dict:
+ def list_document_blocks(
+ self, document_id: str, page_token: str, user_id_type: str = "open_id", page_size: int = 500
+ ) -> dict:
"""
API url: https://open.larkoffice.com/document/server-docs/docs/docs/docx-v1/document/list
"""
- url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/list_document_blocks"
params = {
+ "user_id_type": user_id_type,
"document_id": document_id,
"page_size": page_size,
"page_token": page_token,
}
- res = self._send_request(url, method="get", params=params)
+ url = f"{self.API_BASE_URL}/document/list_document_blocks"
+ res = self._send_request(url, method="GET", params=params)
return res.get("data")
def send_bot_message(self, receive_id_type: str, receive_id: str, msg_type: str, content: str) -> dict:
"""
API url: https://open.larkoffice.com/document/server-docs/im-v1/message/create
"""
- url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/message/send_bot_message"
+ url = f"{self.API_BASE_URL}/message/send_bot_message"
params = {
"receive_id_type": receive_id_type,
}
payload = {
"receive_id": receive_id,
"msg_type": msg_type,
- "content": content,
+ "content": content.strip('"').replace(r"\"", '"').replace(r"\\", "\\"),
}
res = self._send_request(url, params=params, payload=payload)
return res.get("data")
def send_webhook_message(self, webhook: str, msg_type: str, content: str) -> dict:
- url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/message/send_webhook_message"
+ url = f"{self.API_BASE_URL}/message/send_webhook_message"
payload = {
"webhook": webhook,
"msg_type": msg_type,
- "content": content,
+ "content": content.strip('"').replace(r"\"", '"').replace(r"\\", "\\"),
}
res = self._send_request(url, require_token=False, payload=payload)
return res
+
+ def get_chat_messages(
+ self,
+ container_id: str,
+ start_time: str,
+ end_time: str,
+ page_token: str,
+ sort_type: str = "ByCreateTimeAsc",
+ page_size: int = 20,
+ ) -> dict:
+ """
+ API url: https://open.larkoffice.com/document/server-docs/im-v1/message/list
+ """
+ url = f"{self.API_BASE_URL}/message/get_chat_messages"
+ params = {
+ "container_id": container_id,
+ "start_time": start_time,
+ "end_time": end_time,
+ "sort_type": sort_type,
+ "page_token": page_token,
+ "page_size": page_size,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def get_thread_messages(
+ self, container_id: str, page_token: str, sort_type: str = "ByCreateTimeAsc", page_size: int = 20
+ ) -> dict:
+ """
+ API url: https://open.larkoffice.com/document/server-docs/im-v1/message/list
+ """
+ url = f"{self.API_BASE_URL}/message/get_thread_messages"
+ params = {
+ "container_id": container_id,
+ "sort_type": sort_type,
+ "page_token": page_token,
+ "page_size": page_size,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def create_task(self, summary: str, start_time: str, end_time: str, completed_time: str, description: str) -> dict:
+ # 创建任务
+ url = f"{self.API_BASE_URL}/task/create_task"
+ payload = {
+ "summary": summary,
+ "start_time": start_time,
+ "end_time": end_time,
+ "completed_at": completed_time,
+ "description": description,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def update_task(
+ self, task_guid: str, summary: str, start_time: str, end_time: str, completed_time: str, description: str
+ ) -> dict:
+ # 更新任务
+ url = f"{self.API_BASE_URL}/task/update_task"
+ payload = {
+ "task_guid": task_guid,
+ "summary": summary,
+ "start_time": start_time,
+ "end_time": end_time,
+ "completed_time": completed_time,
+ "description": description,
+ }
+ res = self._send_request(url, method="PATCH", payload=payload)
+ return res.get("data")
+
+ def delete_task(self, task_guid: str) -> dict:
+ # 删除任务
+ url = f"{self.API_BASE_URL}/task/delete_task"
+ payload = {
+ "task_guid": task_guid,
+ }
+ res = self._send_request(url, method="DELETE", payload=payload)
+ return res
+
+ def add_members(self, task_guid: str, member_phone_or_email: str, member_role: str) -> dict:
+ # 删除任务
+ url = f"{self.API_BASE_URL}/task/add_members"
+ payload = {
+ "task_guid": task_guid,
+ "member_phone_or_email": member_phone_or_email,
+ "member_role": member_role,
+ }
+ res = self._send_request(url, payload=payload)
+ return res
+
+ def get_wiki_nodes(self, space_id: str, parent_node_token: str, page_token: str, page_size: int = 20) -> dict:
+ # 获取知识库全部子节点列表
+ url = f"{self.API_BASE_URL}/wiki/get_wiki_nodes"
+ payload = {
+ "space_id": space_id,
+ "parent_node_token": parent_node_token,
+ "page_token": page_token,
+ "page_size": page_size,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def get_primary_calendar(self, user_id_type: str = "open_id") -> dict:
+ url = f"{self.API_BASE_URL}/calendar/get_primary_calendar"
+ params = {
+ "user_id_type": user_id_type,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def create_event(
+ self,
+ summary: str,
+ description: str,
+ start_time: str,
+ end_time: str,
+ attendee_ability: str,
+ need_notification: bool = True,
+ auto_record: bool = False,
+ ) -> dict:
+ url = f"{self.API_BASE_URL}/calendar/create_event"
+ payload = {
+ "summary": summary,
+ "description": description,
+ "need_notification": need_notification,
+ "start_time": start_time,
+ "end_time": end_time,
+ "auto_record": auto_record,
+ "attendee_ability": attendee_ability,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def update_event(
+ self,
+ event_id: str,
+ summary: str,
+ description: str,
+ need_notification: bool,
+ start_time: str,
+ end_time: str,
+ auto_record: bool,
+ ) -> dict:
+ url = f"{self.API_BASE_URL}/calendar/update_event/{event_id}"
+ payload = {}
+ if summary:
+ payload["summary"] = summary
+ if description:
+ payload["description"] = description
+ if start_time:
+ payload["start_time"] = start_time
+ if end_time:
+ payload["end_time"] = end_time
+ if need_notification:
+ payload["need_notification"] = need_notification
+ if auto_record:
+ payload["auto_record"] = auto_record
+ res = self._send_request(url, method="PATCH", payload=payload)
+ return res
+
+ def delete_event(self, event_id: str, need_notification: bool = True) -> dict:
+ url = f"{self.API_BASE_URL}/calendar/delete_event/{event_id}"
+ params = {
+ "need_notification": need_notification,
+ }
+ res = self._send_request(url, method="DELETE", params=params)
+ return res
+
+ def list_events(self, start_time: str, end_time: str, page_token: str, page_size: int = 50) -> dict:
+ url = f"{self.API_BASE_URL}/calendar/list_events"
+ params = {
+ "start_time": start_time,
+ "end_time": end_time,
+ "page_token": page_token,
+ "page_size": page_size,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def search_events(
+ self,
+ query: str,
+ start_time: str,
+ end_time: str,
+ page_token: str,
+ user_id_type: str = "open_id",
+ page_size: int = 20,
+ ) -> dict:
+ url = f"{self.API_BASE_URL}/calendar/search_events"
+ payload = {
+ "query": query,
+ "start_time": start_time,
+ "end_time": end_time,
+ "page_token": page_token,
+ "user_id_type": user_id_type,
+ "page_size": page_size,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def add_event_attendees(self, event_id: str, attendee_phone_or_email: str, need_notification: bool = True) -> dict:
+ # 参加日程参会人
+ url = f"{self.API_BASE_URL}/calendar/add_event_attendees"
+ payload = {
+ "event_id": event_id,
+ "attendee_phone_or_email": attendee_phone_or_email,
+ "need_notification": need_notification,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def create_spreadsheet(
+ self,
+ title: str,
+ folder_token: str,
+ ) -> dict:
+ # 创建电子表格
+ url = f"{self.API_BASE_URL}/spreadsheet/create_spreadsheet"
+ payload = {
+ "title": title,
+ "folder_token": folder_token,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def get_spreadsheet(
+ self,
+ spreadsheet_token: str,
+ user_id_type: str = "open_id",
+ ) -> dict:
+ # 获取电子表格信息
+ url = f"{self.API_BASE_URL}/spreadsheet/get_spreadsheet"
+ params = {
+ "spreadsheet_token": spreadsheet_token,
+ "user_id_type": user_id_type,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def list_spreadsheet_sheets(
+ self,
+ spreadsheet_token: str,
+ ) -> dict:
+ # 列出电子表格的所有工作表
+ url = f"{self.API_BASE_URL}/spreadsheet/list_spreadsheet_sheets"
+ params = {
+ "spreadsheet_token": spreadsheet_token,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def add_rows(
+ self,
+ spreadsheet_token: str,
+ sheet_id: str,
+ sheet_name: str,
+ length: int,
+ values: str,
+ ) -> dict:
+ # 增加行,在工作表最后添加
+ url = f"{self.API_BASE_URL}/spreadsheet/add_rows"
+ payload = {
+ "spreadsheet_token": spreadsheet_token,
+ "sheet_id": sheet_id,
+ "sheet_name": sheet_name,
+ "length": length,
+ "values": values,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def add_cols(
+ self,
+ spreadsheet_token: str,
+ sheet_id: str,
+ sheet_name: str,
+ length: int,
+ values: str,
+ ) -> dict:
+ # 增加列,在工作表最后添加
+ url = f"{self.API_BASE_URL}/spreadsheet/add_cols"
+ payload = {
+ "spreadsheet_token": spreadsheet_token,
+ "sheet_id": sheet_id,
+ "sheet_name": sheet_name,
+ "length": length,
+ "values": values,
+ }
+ res = self._send_request(url, payload=payload)
+ return res.get("data")
+
+ def read_rows(
+ self,
+ spreadsheet_token: str,
+ sheet_id: str,
+ sheet_name: str,
+ start_row: int,
+ num_rows: int,
+ user_id_type: str = "open_id",
+ ) -> dict:
+ # 读取工作表行数据
+ url = f"{self.API_BASE_URL}/spreadsheet/read_rows"
+ params = {
+ "spreadsheet_token": spreadsheet_token,
+ "sheet_id": sheet_id,
+ "sheet_name": sheet_name,
+ "start_row": start_row,
+ "num_rows": num_rows,
+ "user_id_type": user_id_type,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def read_cols(
+ self,
+ spreadsheet_token: str,
+ sheet_id: str,
+ sheet_name: str,
+ start_col: int,
+ num_cols: int,
+ user_id_type: str = "open_id",
+ ) -> dict:
+ # 读取工作表列数据
+ url = f"{self.API_BASE_URL}/spreadsheet/read_cols"
+ params = {
+ "spreadsheet_token": spreadsheet_token,
+ "sheet_id": sheet_id,
+ "sheet_name": sheet_name,
+ "start_col": start_col,
+ "num_cols": num_cols,
+ "user_id_type": user_id_type,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
+
+ def read_table(
+ self,
+ spreadsheet_token: str,
+ sheet_id: str,
+ sheet_name: str,
+ num_range: str,
+ query: str,
+ user_id_type: str = "open_id",
+ ) -> dict:
+ # 自定义读取行列数据
+ url = f"{self.API_BASE_URL}/spreadsheet/read_table"
+ params = {
+ "spreadsheet_token": spreadsheet_token,
+ "sheet_id": sheet_id,
+ "sheet_name": sheet_name,
+ "range": num_range,
+ "query": query,
+ "user_id_type": user_id_type,
+ }
+ res = self._send_request(url, method="GET", params=params)
+ return res.get("data")
diff --git a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py b/api/tests/integration_tests/vdb/pgvector/test_pgvector.py
index c5a986b747..72efdc2780 100644
--- a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py
+++ b/api/tests/integration_tests/vdb/pgvector/test_pgvector.py
@@ -18,6 +18,8 @@ class PGVectorTest(AbstractVectorTest):
user="postgres",
password="difyai123456",
database="dify",
+ min_connection=1,
+ max_connection=5,
),
)
diff --git a/docker/.env.example b/docker/.env.example
index f7479791ce..eb05f7aa4f 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -346,7 +346,7 @@ VOLCENGINE_TOS_REGION=your-region
# ------------------------------
# The type of vector store to use.
-# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`.
+# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, ``chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`.
VECTOR_STORE=weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
@@ -385,12 +385,31 @@ MYSCALE_PASSWORD=
MYSCALE_DATABASE=dify
MYSCALE_FTS_PARAMS=
-# pgvector configurations, only available when VECTOR_STORE is `pgvecto-rs or pgvector`
+# pgvector configurations, only available when VECTOR_STORE is `pgvector`
PGVECTOR_HOST=pgvector
PGVECTOR_PORT=5432
PGVECTOR_USER=postgres
PGVECTOR_PASSWORD=difyai123456
PGVECTOR_DATABASE=dify
+PGVECTOR_MIN_CONNECTION=1
+PGVECTOR_MAX_CONNECTION=5
+
+# pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs`
+PGVECTO_RS_HOST=pgvecto-rs
+PGVECTO_RS_PORT=5432
+PGVECTO_RS_USER=postgres
+PGVECTO_RS_PASSWORD=difyai123456
+PGVECTO_RS_DATABASE=dify
+
+# analyticdb configurations, only available when VECTOR_STORE is `analyticdb`
+ANALYTICDB_KEY_ID=your-ak
+ANALYTICDB_KEY_SECRET=your-sk
+ANALYTICDB_REGION_ID=cn-hangzhou
+ANALYTICDB_INSTANCE_ID=gp-ab123456
+ANALYTICDB_ACCOUNT=testaccount
+ANALYTICDB_PASSWORD=testpassword
+ANALYTICDB_NAMESPACE=dify
+ANALYTICDB_NAMESPACE_PASSWORD=difypassword
# TiDB vector configurations, only available when VECTOR_STORE is `tidb`
TIDB_VECTOR_HOST=tidb
diff --git a/web/app/activate/page.tsx b/web/app/activate/page.tsx
index 90874f50ce..0f18544335 100644
--- a/web/app/activate/page.tsx
+++ b/web/app/activate/page.tsx
@@ -22,7 +22,7 @@ const Activate = () => {
{children})
- }, [chartData, children, className, inline, isSVG, language, languageShowName, match, props])
+ }
+ else if (language === 'svg' && isSVG) {
+ return (
+ {children}
+
+ return (
+