diff --git a/api/.env.example b/api/.env.example index 3cff88c400..084ef63c60 100644 --- a/api/.env.example +++ b/api/.env.example @@ -162,6 +162,8 @@ PGVECTOR_PORT=5433 PGVECTOR_USER=postgres PGVECTOR_PASSWORD=postgres PGVECTOR_DATABASE=postgres +PGVECTOR_MIN_CONNECTION=1 +PGVECTOR_MAX_CONNECTION=5 # Tidb Vector configuration TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com diff --git a/api/configs/middleware/vdb/pgvector_config.py b/api/configs/middleware/vdb/pgvector_config.py index 395dcaa420..85f5dca7e2 100644 --- a/api/configs/middleware/vdb/pgvector_config.py +++ b/api/configs/middleware/vdb/pgvector_config.py @@ -33,3 +33,13 @@ class PGVectorConfig(BaseSettings): description="Name of the PostgreSQL database to connect to", default=None, ) + + PGVECTOR_MIN_CONNECTION: PositiveInt = Field( + description="Min connection of the PostgreSQL database", + default=1, + ) + + PGVECTOR_MAX_CONNECTION: PositiveInt = Field( + description="Max connection of the PostgreSQL database", + default=5, + ) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 2c4e5ac607..5a763b3457 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -563,10 +563,10 @@ class DatasetRetrievalSettingApi(Resource): case ( VectorType.MILVUS | VectorType.RELYT - | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT + | VectorType.PGVECTO_RS ): return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]} case ( @@ -577,6 +577,7 @@ class DatasetRetrievalSettingApi(Resource): | VectorType.MYSCALE | VectorType.ORACLE | VectorType.ELASTICSEARCH + | VectorType.PGVECTOR ): return { "retrieval_method": [ diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index 4797b69b85..807f09598c 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -119,7 +119,7 @@ class ProviderConfiguration(BaseModel): credentials = model_configuration.credentials break - if self.custom_configuration.provider: + if not credentials and self.custom_configuration.provider: credentials = self.custom_configuration.provider.credentials return credentials diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml new file mode 100644 index 0000000000..31415a24fa --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct +label: + zh_Hans: Llama 3.2 11B Vision Instruct + en_US: Llama 3.2 11B Vision Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml new file mode 100644 index 0000000000..c2fd77d256 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-1b-instruct +label: + zh_Hans: Llama 3.2 1B Instruct + en_US: Llama 3.2 1B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.1' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml new file mode 100644 index 0000000000..4b3c459c7b --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-3b-instruct +label: + zh_Hans: Llama 3.2 3B Instruct + en_US: Llama 3.2 3B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.1' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml new file mode 100644 index 0000000000..0aece7455d --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct +label: + zh_Hans: Llama 3.2 90B Vision Instruct + en_US: Llama 3.2 90B Vision Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.9' + output: '0.9' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml new file mode 100644 index 0000000000..d84e9937e0 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash-001 +label: + en_US: Gemini 1.5 Flash 001 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml new file mode 100644 index 0000000000..2ff70564b2 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash-002 +label: + en_US: Gemini 1.5 Flash 002 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml index bbc697e934..4e0209890a 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml new file mode 100644 index 0000000000..2aea8149f4 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash-8b-exp-0924 +label: + en_US: Gemini 1.5 Flash 8B 0924 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml index c5695e5dda..faabc5e4d1 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml index 24b1c5af8a..a22fcca941 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml @@ -1,6 +1,6 @@ model: gemini-1.5-flash-latest label: - en_US: Gemini 1.5 Flash + en_US: Gemini 1.5 Flash Latest model_type: llm features: - agent-thought @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml new file mode 100644 index 0000000000..dfd55c3a94 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash +label: + en_US: Gemini 1.5 Flash +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml new file mode 100644 index 0000000000..a1feff171d --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-pro-001 +label: + en_US: Gemini 1.5 Pro 001 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 2097152 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml new file mode 100644 index 0000000000..9ae07a06c5 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-pro-002 +label: + en_US: Gemini 1.5 Pro 002 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 2097152 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml index 0a918e0d7b..97c68f7a18 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml index 7452ce46e7..860e4816a1 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml index b3e1ecf3af..d1bf7d269d 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml @@ -1,6 +1,6 @@ model: gemini-1.5-pro-latest label: - en_US: Gemini 1.5 Pro + en_US: Gemini 1.5 Pro Latest model_type: llm features: - agent-thought @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml new file mode 100644 index 0000000000..bdd70b34a2 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-pro +label: + en_US: Gemini 1.5 Pro +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 2097152 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml index 075e484e46..2d213d56ad 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml @@ -27,6 +27,15 @@ parameter_rules: default: 4096 min: 1 max: 4096 + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml index 4e9f59e7da..e2f487c1ee 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml @@ -31,6 +31,15 @@ parameter_rules: max: 2048 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/llm.py b/api/core/model_runtime/model_providers/google/llm/llm.py index 3fc6787a44..e686ad08d9 100644 --- a/api/core/model_runtime/model_providers/google/llm/llm.py +++ b/api/core/model_runtime/model_providers/google/llm/llm.py @@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm import google.generativeai as genai import requests from google.api_core import exceptions -from google.generativeai import client -from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory +from google.generativeai.client import _ClientManager +from google.generativeai.types import ContentType, GenerateContentResponse from google.generativeai.types.content_types import to_part from PIL import Image @@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel): history.append(content) # Create a new ClientManager with tenant's API key - new_client_manager = client._ClientManager() + new_client_manager = _ClientManager() new_client_manager.configure(api_key=credentials["google_api_key"]) new_custom_client = new_client_manager.make_client("generative") google_model._client = new_custom_client - safety_settings = { - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, - } - response = google_model.generate_content( contents=history, generation_config=genai.types.GenerationConfig(**config_kwargs), stream=stream, - safety_settings=safety_settings, tools=self._convert_tools_to_glm_tool(tools) if tools else None, request_options={"timeout": 600}, ) diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml new file mode 100644 index 0000000000..019d453723 --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-11b-text-preview +label: + zh_Hans: Llama 3.2 11B Text (Preview) + en_US: Llama 3.2 11B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml new file mode 100644 index 0000000000..a44e4ff508 --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-1b-preview +label: + zh_Hans: Llama 3.2 1B Text (Preview) + en_US: Llama 3.2 1B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml new file mode 100644 index 0000000000..f2fdd0a05e --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-3b-preview +label: + zh_Hans: Llama 3.2 3B Text (Preview) + en_US: Llama 3.2 3B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml new file mode 100644 index 0000000000..3b34e7c079 --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-90b-text-preview +label: + zh_Hans: Llama 3.2 90B Text (Preview) + en_US: Llama 3.2 90B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py index ff732e6925..a7ea53e0e9 100644 --- a/api/core/model_runtime/model_providers/ollama/llm/llm.py +++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py @@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel): if chunk_json["done"]: # calculate num tokens - if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json: - # transform usage + if "prompt_eval_count" in chunk_json: prompt_tokens = chunk_json["prompt_eval_count"] - completion_tokens = chunk_json["eval_count"] else: - # calculate num tokens - prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) - completion_tokens = self._get_num_tokens_by_gpt2(full_text) + prompt_message_content = prompt_messages[0].content + if isinstance(prompt_message_content, str): + prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content) + else: + content_text = "" + for message_content in prompt_message_content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast(TextPromptMessageContent, message_content) + content_text += message_content.data + prompt_tokens = self._get_num_tokens_by_gpt2(content_text) + + completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text)) # transform usage usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml index a3e5d0981f..8d1df82140 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml @@ -1,18 +1,17 @@ - Qwen/Qwen2.5-72B-Instruct -- Qwen/Qwen2.5-Math-72B-Instruct - Qwen/Qwen2.5-32B-Instruct - Qwen/Qwen2.5-14B-Instruct - Qwen/Qwen2.5-7B-Instruct - Qwen/Qwen2.5-Coder-7B-Instruct -- deepseek-ai/DeepSeek-V2.5 +- Qwen/Qwen2.5-Math-72B-Instruct - Qwen/Qwen2-72B-Instruct - Qwen/Qwen2-57B-A14B-Instruct - Qwen/Qwen2-7B-Instruct - Qwen/Qwen2-1.5B-Instruct +- deepseek-ai/DeepSeek-V2.5 - deepseek-ai/DeepSeek-V2-Chat - deepseek-ai/DeepSeek-Coder-V2-Instruct - THUDM/glm-4-9b-chat -- THUDM/chatglm3-6b - 01-ai/Yi-1.5-34B-Chat-16K - 01-ai/Yi-1.5-9B-Chat-16K - 01-ai/Yi-1.5-6B-Chat @@ -26,13 +25,4 @@ - google/gemma-2-27b-it - google/gemma-2-9b-it - mistralai/Mistral-7B-Instruct-v0.2 -- Pro/Qwen/Qwen2-7B-Instruct -- Pro/Qwen/Qwen2-1.5B-Instruct -- Pro/THUDM/glm-4-9b-chat -- Pro/THUDM/chatglm3-6b -- Pro/01-ai/Yi-1.5-9B-Chat-16K -- Pro/01-ai/Yi-1.5-6B-Chat -- Pro/internlm/internlm2_5-7b-chat -- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct -- Pro/meta-llama/Meta-Llama-3-8B-Instruct -- Pro/google/gemma-2-9b-it +- mistralai/Mixtral-8x7B-Instruct-v0.1 diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml new file mode 100644 index 0000000000..d9663582e5 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml @@ -0,0 +1,30 @@ +model: internlm/internlm2_5-20b-chat +label: + en_US: internlm/internlm2_5-20b-chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1' + output: '1' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml index 27664eab6c..89fb153ba0 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml @@ -28,3 +28,4 @@ pricing: output: '0' unit: '0.000001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml index fd7aada428..2785e7496f 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml @@ -28,3 +28,4 @@ pricing: output: '1.26' unit: '0.000001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-7b-instruct.yaml new file mode 100644 index 0000000000..76526200cc --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-7b-instruct.yaml @@ -0,0 +1,74 @@ +model: Qwen/Qwen2.5-Coder-7B-Instruct +label: + en_US: Qwen/Qwen2.5-Coder-7B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-math-72b-instruct.yaml new file mode 100644 index 0000000000..90afa0cfd5 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-math-72b-instruct.yaml @@ -0,0 +1,74 @@ +model: Qwen/Qwen2.5-Math-72B-Instruct +label: + en_US: Qwen/Qwen2.5-Math-72B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml index d0ff443827..34a57d1fc0 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: farui-plus label: en_US: farui-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py index f90c7f075f..3e3585b30a 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py +++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py @@ -18,7 +18,7 @@ from dashscope.common.error import ( UnsupportedModel, ) -from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.message_entities import ( AssistantPromptMessage, ImagePromptMessageContent, @@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import ( FetchFrom, I18nObject, ModelFeature, + ModelPropertyKey, ModelType, ParameterRule, ParameterType, @@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel): :param tools: tools for tool calling :return: """ + # Check if the model was added via get_customizable_model_schema + if self.get_customizable_model_schema(model, credentials) is not None: + # For custom models, tokens are not calculated. + return 0 + if model in {"qwen-turbo-chat", "qwen-plus-chat"}: model = model.replace("-chat", "") if model == "farui-plus": @@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel): :param credentials: model credentials :return: AIModelEntity or None """ - rules = [ - ParameterRule( - name="temperature", - type=ParameterType.FLOAT, - use_template="temperature", - label=I18nObject(zh_Hans="温度", en_US="Temperature"), - ), - ParameterRule( - name="top_p", - type=ParameterType.FLOAT, - use_template="top_p", - label=I18nObject(zh_Hans="Top P", en_US="Top P"), - ), - ParameterRule( - name="top_k", - type=ParameterType.INT, - min=0, - max=99, - label=I18nObject(zh_Hans="top_k", en_US="top_k"), - ), - ParameterRule( - name="max_tokens", - type=ParameterType.INT, - min=1, - max=128000, - default=1024, - label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"), - ), - ParameterRule( - name="seed", - type=ParameterType.INT, - default=1234, - label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"), - ), - ParameterRule( - name="repetition_penalty", - type=ParameterType.FLOAT, - default=1.1, - label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"), - ), - ] - - entity = AIModelEntity( + return AIModelEntity( model=model, - label=I18nObject(en_US=model), - fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + label=I18nObject(en_US=model, zh_Hans=model), model_type=ModelType.LLM, - model_properties={}, - parameter_rules=rules, + features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL] + if credentials.get("function_calling_type") == "tool_call" + else [], + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)), + ModelPropertyKey.MODE: LLMMode.CHAT.value, + }, + parameter_rules=[ + ParameterRule( + name="temperature", + use_template="temperature", + label=I18nObject(en_US="Temperature", zh_Hans="温度"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="max_tokens", + use_template="max_tokens", + default=512, + min=1, + max=int(credentials.get("max_tokens", 1024)), + label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), + type=ParameterType.INT, + ), + ParameterRule( + name="top_p", + use_template="top_p", + label=I18nObject(en_US="Top P", zh_Hans="Top P"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="top_k", + use_template="top_k", + label=I18nObject(en_US="Top K", zh_Hans="Top K"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="frequency_penalty", + use_template="frequency_penalty", + label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"), + type=ParameterType.FLOAT, + ), + ], ) - - return entity diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml index d9792e71ee..64a3f33133 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-coder-turbo-0919 label: en_US: qwen-coder-turbo-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml index 0b03505c45..a4c93f7047 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-coder-turbo-latest label: en_US: qwen-coder-turbo-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml index 2a6c040853..ff68faed80 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-coder-turbo label: en_US: qwen-coder-turbo diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml index bad7f4f472..c3dbb3616f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml @@ -1,4 +1,4 @@ -# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6 +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-long label: en_US: qwen-long diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml index c14aee1e1e..42fe1f6862 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus-0816 label: en_US: qwen-math-plus-0816 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml index 9d74eeca3e..9b6567b8cd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus-0919 label: en_US: qwen-math-plus-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml index b8601a969a..b2a2393b36 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus-latest label: en_US: qwen-math-plus-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml index 4a948be597..63f4b7ff0a 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus label: en_US: qwen-math-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml index bffe324a96..4da90eec3e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-turbo-0919 label: en_US: qwen-math-turbo-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml index 0747e96614..d29f8851dd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-turbo-latest label: en_US: qwen-math-turbo-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml index dffb5557ff..2a8f7f725e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-turbo label: en_US: qwen-math-turbo diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml index 8ae159f1bf..ef1841b517 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0107 label: en_US: qwen-max-0107 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml index 93fb37254e..a2ea5df130 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max-0403, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0403 label: en_US: qwen-max-0403 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml index a5c9d49609..a467665f11 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max-0428, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0428 label: en_US: qwen-max-0428 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml index e4a6dae637..78661eaea0 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0919 label: en_US: qwen-max-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml index 6fae8a7d38..6f4674576b 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-1201 label: en_US: qwen-max-1201 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml index 8e20968859..8b5f005473 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-latest label: en_US: qwen-max-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml index 9bc50c73fc..098494ff95 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-longcontext label: en_US: qwen-max-longcontext diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml index c6a64dc507..9d0d3f8db3 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max label: en_US: qwen-max diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml index 430599300b..0b1a6f81df 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0206, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0206 label: en_US: qwen-plus-0206 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml index 906995d2b9..7706005bb5 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0624, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0624 label: en_US: qwen-plus-0624 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml index b33e725dd0..348276fc08 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0723, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0723 label: en_US: qwen-plus-0723 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml index bb394fad81..29f125135e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0806, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0806 label: en_US: qwen-plus-0806 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml index 118e304a97..905fa1e102 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0919 label: en_US: qwen-plus-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml index 761312bc38..c7a3549727 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-chat label: en_US: qwen-plus-chat diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml index 430872fb31..608f52c296 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-latest, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-latest label: en_US: qwen-plus-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml index f3fce30209..9089e57255 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus label: en_US: qwen-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml index 2628d824fe..7ee0d44f2f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml @@ -1,3 +1,6 @@ +# this model corresponds to qwen-turbo-0206, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) + model: qwen-turbo-0206 label: en_US: qwen-turbo-0206 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml index 8097459bf0..20a3f7eb64 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo-0624, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-0624 label: en_US: qwen-turbo-0624 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml index e43beeb195..ba73dec363 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-0919 label: en_US: qwen-turbo-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml index c30cb7ca10..d785b7fe85 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-chat label: en_US: qwen-turbo-chat diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml index e443d6888b..fe38a4283c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo-latest, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-latest label: en_US: qwen-turbo-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml index 33f05967c2..215c9ec5fc 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo label: en_US: qwen-turbo diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml index 63b6074d0d..d80168ffc3 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-max-0201 label: en_US: qwen-vl-max-0201 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml index fd20377002..50e10226a5 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-max-0809 label: en_US: qwen-vl-max-0809 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml index 31a9fb51bb..21b127f56c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-max label: en_US: qwen-vl-max diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml index 5f90cf48bc..03cb039d15 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-plus-0201 label: en_US: qwen-vl-plus-0201 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml index 97820c0f3a..67b2b2ebdd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-plus-0809 label: en_US: qwen-vl-plus-0809 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml index 6af36cd6f3..f55764c6c0 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-plus label: en_US: qwen-vl-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml index 158e2c7ee1..ea157f42de 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2-math-1.5b-instruct label: en_US: qwen2-math-1.5b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml index e26a6923d1..37052a9233 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2-math-72b-instruct label: en_US: qwen2-math-72b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml index 589119b26e..e182f1c27f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2-math-7b-instruct label: en_US: qwen2-math-7b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml index dd608fbf76..9e75ccc1f2 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-0.5b-instruct label: en_US: qwen2.5-0.5b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml index 08237b3958..67c9d31243 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-1.5b-instruct label: en_US: qwen2.5-1.5b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml index 640b019703..2a38be921c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-14b-instruct label: en_US: qwen2.5-14b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml index 3a90ca7532..e6e4fbf978 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-32b-instruct label: en_US: qwen2.5-32b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml index b79755eb9b..8f250379a7 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-3b-instruct label: en_US: qwen2.5-3b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml index e9dd51a341..bb3cdd6141 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-72b-instruct label: en_US: qwen2.5-72b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml index 04f26cf5fe..fdcd3d4275 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-7b-instruct label: en_US: qwen2.5-7b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml index 04f26cf5fe..7ebeec3953 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml @@ -1,6 +1,7 @@ -model: qwen2.5-7b-instruct +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-coder-7b-instruct label: - en_US: qwen2.5-7b-instruct + en_US: qwen2.5-coder-7b-instruct model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml index f4303c53d3..52e35d8b50 100644 --- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw model: text-embedding-v1 model_type: text-embedding model_properties: diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml index f6be3544ed..5bb6a8f424 100644 --- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw model: text-embedding-v2 model_type: text-embedding model_properties: diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml index 171a379ee2..d8af0e2b63 100644 --- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw model: text-embedding-v3 model_type: text-embedding model_properties: diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml index fabe6d90e6..1a09c20fd9 100644 --- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml +++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml @@ -37,14 +37,51 @@ model_credential_schema: en_US: Model Name zh_Hans: 模型名称 placeholder: - en_US: Enter full model name - zh_Hans: 输入模型全称 + en_US: Enter your model name + zh_Hans: 输入模型名称 credential_form_schemas: - variable: dashscope_api_key - required: true label: en_US: API Key type: secret-input + required: true placeholder: zh_Hans: 在此输入您的 API Key en_US: Enter your API Key + - variable: context_size + label: + zh_Hans: 模型上下文长度 + en_US: Model context size + required: true + type: text-input + default: '4096' + placeholder: + zh_Hans: 在此输入您的模型上下文长度 + en_US: Enter your Model context size + - variable: max_tokens + label: + zh_Hans: 最大 token 上限 + en_US: Upper bound for max tokens + default: '4096' + type: text-input + show_on: + - variable: __model_type + value: llm + - variable: function_calling_type + label: + en_US: Function calling + type: select + required: false + default: no_call + options: + - value: no_call + label: + en_US: Not Support + zh_Hans: 不支持 + - value: function_call + label: + en_US: Support + zh_Hans: 支持 + show_on: + - variable: __model_type + value: llm diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py index 612542dab1..6dcd98dcfd 100644 --- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py +++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py @@ -40,19 +40,8 @@ class AnalyticdbConfig(BaseModel): class AnalyticdbVector(BaseVector): - _instance = None - _init = False - - def __new__(cls, *args, **kwargs): - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - def __init__(self, collection_name: str, config: AnalyticdbConfig): - # collection_name must be updated every time self._collection_name = collection_name.lower() - if AnalyticdbVector._init: - return try: from alibabacloud_gpdb20160503.client import Client from alibabacloud_tea_openapi import models as open_api_models @@ -62,7 +51,6 @@ class AnalyticdbVector(BaseVector): self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params()) self._client = Client(self._client_config) self._initialize() - AnalyticdbVector._init = True def _initialize(self) -> None: cache_key = f"vector_indexing_{self.config.instance_id}" @@ -257,11 +245,14 @@ class AnalyticdbVector(BaseVector): documents = [] for match in response.body.matches.match: if match.score > score_threshold: + metadata = json.loads(match.metadata.get("metadata_")) + metadata["score"] = match.score doc = Document( page_content=match.metadata.get("page_content"), - metadata=json.loads(match.metadata.get("metadata_")), + metadata=metadata, ) documents.append(doc) + documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True) return documents def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: @@ -286,12 +277,14 @@ class AnalyticdbVector(BaseVector): for match in response.body.matches.match: if match.score > score_threshold: metadata = json.loads(match.metadata.get("metadata_")) + metadata["score"] = match.score doc = Document( page_content=match.metadata.get("page_content"), vector=match.metadata.get("vector"), metadata=metadata, ) documents.append(doc) + documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True) return documents def delete(self) -> None: diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 79879d4f63..d90707ebcd 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -23,6 +23,8 @@ class PGVectorConfig(BaseModel): user: str password: str database: str + min_connection: int + max_connection: int @model_validator(mode="before") @classmethod @@ -37,6 +39,12 @@ class PGVectorConfig(BaseModel): raise ValueError("config PGVECTOR_PASSWORD is required") if not values["database"]: raise ValueError("config PGVECTOR_DATABASE is required") + if not values["min_connection"]: + raise ValueError("config PGVECTOR_MIN_CONNECTION is required") + if not values["max_connection"]: + raise ValueError("config PGVECTOR_MAX_CONNECTION is required") + if values["min_connection"] > values["max_connection"]: + raise ValueError("config PGVECTOR_MIN_CONNECTION should less than PGVECTOR_MAX_CONNECTION") return values @@ -61,8 +69,8 @@ class PGVector(BaseVector): def _create_connection_pool(self, config: PGVectorConfig): return psycopg2.pool.SimpleConnectionPool( - 1, - 5, + config.min_connection, + config.max_connection, host=config.host, port=config.port, user=config.user, @@ -213,5 +221,7 @@ class PGVectorFactory(AbstractVectorFactory): user=dify_config.PGVECTOR_USER, password=dify_config.PGVECTOR_PASSWORD, database=dify_config.PGVECTOR_DATABASE, + min_connection=dify_config.PGVECTOR_MIN_CONNECTION, + max_connection=dify_config.PGVECTOR_MAX_CONNECTION, ), ) diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index faa373017b..39e3a7f6cf 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -56,7 +56,7 @@ class TencentVector(BaseVector): return self._client.create_database(database_name=self._client_config.database) def get_type(self) -> str: - return "tencent" + return VectorType.TENCENT def to_index_struct(self) -> dict: return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}} diff --git a/api/core/rag/datasource/vdb/vector_base.py b/api/core/rag/datasource/vdb/vector_base.py index 1a0dc7f48b..22e191340d 100644 --- a/api/core/rag/datasource/vdb/vector_base.py +++ b/api/core/rag/datasource/vdb/vector_base.py @@ -45,6 +45,7 @@ class BaseVector(ABC): def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: raise NotImplementedError + @abstractmethod def delete(self) -> None: raise NotImplementedError diff --git a/api/core/tools/utils/feishu_api_utils.py b/api/core/tools/utils/feishu_api_utils.py index ffdb06498f..ce1fd7dc19 100644 --- a/api/core/tools/utils/feishu_api_utils.py +++ b/api/core/tools/utils/feishu_api_utils.py @@ -1,9 +1,23 @@ import httpx +from core.tools.errors import ToolProviderCredentialValidationError from extensions.ext_redis import redis_client +def auth(credentials): + app_id = credentials.get("app_id") + app_secret = credentials.get("app_secret") + if not app_id or not app_secret: + raise ToolProviderCredentialValidationError("app_id and app_secret is required") + try: + assert FeishuRequest(app_id, app_secret).tenant_access_token is not None + except Exception as e: + raise ToolProviderCredentialValidationError(str(e)) + + class FeishuRequest: + API_BASE_URL = "https://lark-plugin-api.solutionsuite.cn/lark-plugin" + def __init__(self, app_id: str, app_secret: str): self.app_id = app_id self.app_secret = app_secret @@ -42,7 +56,7 @@ class FeishuRequest: "expire": 7200 } """ - url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/access_token/get_tenant_access_token" + url = f"{self.API_BASE_URL}/access_token/get_tenant_access_token" payload = {"app_id": app_id, "app_secret": app_secret} res = self._send_request(url, require_token=False, payload=payload) return res @@ -63,7 +77,7 @@ class FeishuRequest: "msg": "创建飞书文档成功,请查看" } """ - url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/create_document" + url = f"{self.API_BASE_URL}/document/create_document" payload = { "title": title, "content": content, @@ -72,13 +86,13 @@ class FeishuRequest: res = self._send_request(url, payload=payload) return res.get("data") - def write_document(self, document_id: str, content: str, position: str = "start") -> dict: - url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/write_document" + def write_document(self, document_id: str, content: str, position: str = "end") -> dict: + url = f"{self.API_BASE_URL}/document/write_document" payload = {"document_id": document_id, "content": content, "position": position} res = self._send_request(url, payload=payload) return res - def get_document_content(self, document_id: str, mode: str, lang: int = 0) -> dict: + def get_document_content(self, document_id: str, mode: str = "markdown", lang: str = "0") -> dict: """ API url: https://open.larkoffice.com/document/server-docs/docs/docs/docx-v1/document/raw_content Example Response: @@ -95,45 +109,404 @@ class FeishuRequest: "mode": mode, "lang": lang, } - url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/get_document_content" - res = self._send_request(url, method="get", params=params) + url = f"{self.API_BASE_URL}/document/get_document_content" + res = self._send_request(url, method="GET", params=params) return res.get("data").get("content") - def list_document_blocks(self, document_id: str, page_token: str, page_size: int = 500) -> dict: + def list_document_blocks( + self, document_id: str, page_token: str, user_id_type: str = "open_id", page_size: int = 500 + ) -> dict: """ API url: https://open.larkoffice.com/document/server-docs/docs/docs/docx-v1/document/list """ - url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/document/list_document_blocks" params = { + "user_id_type": user_id_type, "document_id": document_id, "page_size": page_size, "page_token": page_token, } - res = self._send_request(url, method="get", params=params) + url = f"{self.API_BASE_URL}/document/list_document_blocks" + res = self._send_request(url, method="GET", params=params) return res.get("data") def send_bot_message(self, receive_id_type: str, receive_id: str, msg_type: str, content: str) -> dict: """ API url: https://open.larkoffice.com/document/server-docs/im-v1/message/create """ - url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/message/send_bot_message" + url = f"{self.API_BASE_URL}/message/send_bot_message" params = { "receive_id_type": receive_id_type, } payload = { "receive_id": receive_id, "msg_type": msg_type, - "content": content, + "content": content.strip('"').replace(r"\"", '"').replace(r"\\", "\\"), } res = self._send_request(url, params=params, payload=payload) return res.get("data") def send_webhook_message(self, webhook: str, msg_type: str, content: str) -> dict: - url = "https://lark-plugin-api.solutionsuite.cn/lark-plugin/message/send_webhook_message" + url = f"{self.API_BASE_URL}/message/send_webhook_message" payload = { "webhook": webhook, "msg_type": msg_type, - "content": content, + "content": content.strip('"').replace(r"\"", '"').replace(r"\\", "\\"), } res = self._send_request(url, require_token=False, payload=payload) return res + + def get_chat_messages( + self, + container_id: str, + start_time: str, + end_time: str, + page_token: str, + sort_type: str = "ByCreateTimeAsc", + page_size: int = 20, + ) -> dict: + """ + API url: https://open.larkoffice.com/document/server-docs/im-v1/message/list + """ + url = f"{self.API_BASE_URL}/message/get_chat_messages" + params = { + "container_id": container_id, + "start_time": start_time, + "end_time": end_time, + "sort_type": sort_type, + "page_token": page_token, + "page_size": page_size, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def get_thread_messages( + self, container_id: str, page_token: str, sort_type: str = "ByCreateTimeAsc", page_size: int = 20 + ) -> dict: + """ + API url: https://open.larkoffice.com/document/server-docs/im-v1/message/list + """ + url = f"{self.API_BASE_URL}/message/get_thread_messages" + params = { + "container_id": container_id, + "sort_type": sort_type, + "page_token": page_token, + "page_size": page_size, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def create_task(self, summary: str, start_time: str, end_time: str, completed_time: str, description: str) -> dict: + # 创建任务 + url = f"{self.API_BASE_URL}/task/create_task" + payload = { + "summary": summary, + "start_time": start_time, + "end_time": end_time, + "completed_at": completed_time, + "description": description, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def update_task( + self, task_guid: str, summary: str, start_time: str, end_time: str, completed_time: str, description: str + ) -> dict: + # 更新任务 + url = f"{self.API_BASE_URL}/task/update_task" + payload = { + "task_guid": task_guid, + "summary": summary, + "start_time": start_time, + "end_time": end_time, + "completed_time": completed_time, + "description": description, + } + res = self._send_request(url, method="PATCH", payload=payload) + return res.get("data") + + def delete_task(self, task_guid: str) -> dict: + # 删除任务 + url = f"{self.API_BASE_URL}/task/delete_task" + payload = { + "task_guid": task_guid, + } + res = self._send_request(url, method="DELETE", payload=payload) + return res + + def add_members(self, task_guid: str, member_phone_or_email: str, member_role: str) -> dict: + # 删除任务 + url = f"{self.API_BASE_URL}/task/add_members" + payload = { + "task_guid": task_guid, + "member_phone_or_email": member_phone_or_email, + "member_role": member_role, + } + res = self._send_request(url, payload=payload) + return res + + def get_wiki_nodes(self, space_id: str, parent_node_token: str, page_token: str, page_size: int = 20) -> dict: + # 获取知识库全部子节点列表 + url = f"{self.API_BASE_URL}/wiki/get_wiki_nodes" + payload = { + "space_id": space_id, + "parent_node_token": parent_node_token, + "page_token": page_token, + "page_size": page_size, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def get_primary_calendar(self, user_id_type: str = "open_id") -> dict: + url = f"{self.API_BASE_URL}/calendar/get_primary_calendar" + params = { + "user_id_type": user_id_type, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def create_event( + self, + summary: str, + description: str, + start_time: str, + end_time: str, + attendee_ability: str, + need_notification: bool = True, + auto_record: bool = False, + ) -> dict: + url = f"{self.API_BASE_URL}/calendar/create_event" + payload = { + "summary": summary, + "description": description, + "need_notification": need_notification, + "start_time": start_time, + "end_time": end_time, + "auto_record": auto_record, + "attendee_ability": attendee_ability, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def update_event( + self, + event_id: str, + summary: str, + description: str, + need_notification: bool, + start_time: str, + end_time: str, + auto_record: bool, + ) -> dict: + url = f"{self.API_BASE_URL}/calendar/update_event/{event_id}" + payload = {} + if summary: + payload["summary"] = summary + if description: + payload["description"] = description + if start_time: + payload["start_time"] = start_time + if end_time: + payload["end_time"] = end_time + if need_notification: + payload["need_notification"] = need_notification + if auto_record: + payload["auto_record"] = auto_record + res = self._send_request(url, method="PATCH", payload=payload) + return res + + def delete_event(self, event_id: str, need_notification: bool = True) -> dict: + url = f"{self.API_BASE_URL}/calendar/delete_event/{event_id}" + params = { + "need_notification": need_notification, + } + res = self._send_request(url, method="DELETE", params=params) + return res + + def list_events(self, start_time: str, end_time: str, page_token: str, page_size: int = 50) -> dict: + url = f"{self.API_BASE_URL}/calendar/list_events" + params = { + "start_time": start_time, + "end_time": end_time, + "page_token": page_token, + "page_size": page_size, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def search_events( + self, + query: str, + start_time: str, + end_time: str, + page_token: str, + user_id_type: str = "open_id", + page_size: int = 20, + ) -> dict: + url = f"{self.API_BASE_URL}/calendar/search_events" + payload = { + "query": query, + "start_time": start_time, + "end_time": end_time, + "page_token": page_token, + "user_id_type": user_id_type, + "page_size": page_size, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def add_event_attendees(self, event_id: str, attendee_phone_or_email: str, need_notification: bool = True) -> dict: + # 参加日程参会人 + url = f"{self.API_BASE_URL}/calendar/add_event_attendees" + payload = { + "event_id": event_id, + "attendee_phone_or_email": attendee_phone_or_email, + "need_notification": need_notification, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def create_spreadsheet( + self, + title: str, + folder_token: str, + ) -> dict: + # 创建电子表格 + url = f"{self.API_BASE_URL}/spreadsheet/create_spreadsheet" + payload = { + "title": title, + "folder_token": folder_token, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def get_spreadsheet( + self, + spreadsheet_token: str, + user_id_type: str = "open_id", + ) -> dict: + # 获取电子表格信息 + url = f"{self.API_BASE_URL}/spreadsheet/get_spreadsheet" + params = { + "spreadsheet_token": spreadsheet_token, + "user_id_type": user_id_type, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def list_spreadsheet_sheets( + self, + spreadsheet_token: str, + ) -> dict: + # 列出电子表格的所有工作表 + url = f"{self.API_BASE_URL}/spreadsheet/list_spreadsheet_sheets" + params = { + "spreadsheet_token": spreadsheet_token, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def add_rows( + self, + spreadsheet_token: str, + sheet_id: str, + sheet_name: str, + length: int, + values: str, + ) -> dict: + # 增加行,在工作表最后添加 + url = f"{self.API_BASE_URL}/spreadsheet/add_rows" + payload = { + "spreadsheet_token": spreadsheet_token, + "sheet_id": sheet_id, + "sheet_name": sheet_name, + "length": length, + "values": values, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def add_cols( + self, + spreadsheet_token: str, + sheet_id: str, + sheet_name: str, + length: int, + values: str, + ) -> dict: + # 增加列,在工作表最后添加 + url = f"{self.API_BASE_URL}/spreadsheet/add_cols" + payload = { + "spreadsheet_token": spreadsheet_token, + "sheet_id": sheet_id, + "sheet_name": sheet_name, + "length": length, + "values": values, + } + res = self._send_request(url, payload=payload) + return res.get("data") + + def read_rows( + self, + spreadsheet_token: str, + sheet_id: str, + sheet_name: str, + start_row: int, + num_rows: int, + user_id_type: str = "open_id", + ) -> dict: + # 读取工作表行数据 + url = f"{self.API_BASE_URL}/spreadsheet/read_rows" + params = { + "spreadsheet_token": spreadsheet_token, + "sheet_id": sheet_id, + "sheet_name": sheet_name, + "start_row": start_row, + "num_rows": num_rows, + "user_id_type": user_id_type, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def read_cols( + self, + spreadsheet_token: str, + sheet_id: str, + sheet_name: str, + start_col: int, + num_cols: int, + user_id_type: str = "open_id", + ) -> dict: + # 读取工作表列数据 + url = f"{self.API_BASE_URL}/spreadsheet/read_cols" + params = { + "spreadsheet_token": spreadsheet_token, + "sheet_id": sheet_id, + "sheet_name": sheet_name, + "start_col": start_col, + "num_cols": num_cols, + "user_id_type": user_id_type, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") + + def read_table( + self, + spreadsheet_token: str, + sheet_id: str, + sheet_name: str, + num_range: str, + query: str, + user_id_type: str = "open_id", + ) -> dict: + # 自定义读取行列数据 + url = f"{self.API_BASE_URL}/spreadsheet/read_table" + params = { + "spreadsheet_token": spreadsheet_token, + "sheet_id": sheet_id, + "sheet_name": sheet_name, + "range": num_range, + "query": query, + "user_id_type": user_id_type, + } + res = self._send_request(url, method="GET", params=params) + return res.get("data") diff --git a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py b/api/tests/integration_tests/vdb/pgvector/test_pgvector.py index c5a986b747..72efdc2780 100644 --- a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py +++ b/api/tests/integration_tests/vdb/pgvector/test_pgvector.py @@ -18,6 +18,8 @@ class PGVectorTest(AbstractVectorTest): user="postgres", password="difyai123456", database="dify", + min_connection=1, + max_connection=5, ), ) diff --git a/docker/.env.example b/docker/.env.example index f7479791ce..eb05f7aa4f 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -346,7 +346,7 @@ VOLCENGINE_TOS_REGION=your-region # ------------------------------ # The type of vector store to use. -# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`. +# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, ``chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`. VECTOR_STORE=weaviate # The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`. @@ -385,12 +385,31 @@ MYSCALE_PASSWORD= MYSCALE_DATABASE=dify MYSCALE_FTS_PARAMS= -# pgvector configurations, only available when VECTOR_STORE is `pgvecto-rs or pgvector` +# pgvector configurations, only available when VECTOR_STORE is `pgvector` PGVECTOR_HOST=pgvector PGVECTOR_PORT=5432 PGVECTOR_USER=postgres PGVECTOR_PASSWORD=difyai123456 PGVECTOR_DATABASE=dify +PGVECTOR_MIN_CONNECTION=1 +PGVECTOR_MAX_CONNECTION=5 + +# pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs` +PGVECTO_RS_HOST=pgvecto-rs +PGVECTO_RS_PORT=5432 +PGVECTO_RS_USER=postgres +PGVECTO_RS_PASSWORD=difyai123456 +PGVECTO_RS_DATABASE=dify + +# analyticdb configurations, only available when VECTOR_STORE is `analyticdb` +ANALYTICDB_KEY_ID=your-ak +ANALYTICDB_KEY_SECRET=your-sk +ANALYTICDB_REGION_ID=cn-hangzhou +ANALYTICDB_INSTANCE_ID=gp-ab123456 +ANALYTICDB_ACCOUNT=testaccount +ANALYTICDB_PASSWORD=testpassword +ANALYTICDB_NAMESPACE=dify +ANALYTICDB_NAMESPACE_PASSWORD=difypassword # TiDB vector configurations, only available when VECTOR_STORE is `tidb` TIDB_VECTOR_HOST=tidb diff --git a/web/app/activate/page.tsx b/web/app/activate/page.tsx index 90874f50ce..0f18544335 100644 --- a/web/app/activate/page.tsx +++ b/web/app/activate/page.tsx @@ -22,7 +22,7 @@ const Activate = () => {
- © {new Date().getFullYear()} Dify, Inc. All rights reserved. + © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
diff --git a/web/app/components/app/configuration/config-var/config-modal/index.tsx b/web/app/components/app/configuration/config-var/config-modal/index.tsx index 606280653e..ac1d86c1a2 100644 --- a/web/app/components/app/configuration/config-var/config-modal/index.tsx +++ b/web/app/components/app/configuration/config-var/config-modal/index.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import React, { useCallback, useState } from 'react' +import React, { useCallback, useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import ModalFoot from '../modal-foot' @@ -40,6 +40,12 @@ const ConfigModal: FC = ({ const { t } = useTranslation() const [tempPayload, setTempPayload] = useState(payload || getNewVarInWorkflow('') as any) const { type, label, variable, options, max_length } = tempPayload + const modalRef = useRef(null) + useEffect(() => { + // To fix the first input element auto focus, then directly close modal will raise error + if (isShow) + modalRef.current?.focus() + }, [isShow]) const isStringInput = type === InputVarType.textInput || type === InputVarType.paragraph const checkVariableName = useCallback((value: string) => { @@ -135,7 +141,7 @@ const ConfigModal: FC = ({ isShow={isShow} onClose={onClose} > -
+
diff --git a/web/app/components/app/log/filter.tsx b/web/app/components/app/log/filter.tsx index 0552b44d16..b8d7ca5a36 100644 --- a/web/app/components/app/log/filter.tsx +++ b/web/app/components/app/log/filter.tsx @@ -55,6 +55,8 @@ const Filter: FC = ({ isChatMode, appId, queryParams, setQueryPara className='!w-[300px]' onSelect={ (item) => { + if (!item.value) + return setQueryParams({ ...queryParams, annotation_status: item.value as string }) } } diff --git a/web/app/components/app/workflow-log/filter.tsx b/web/app/components/app/workflow-log/filter.tsx index d239c39d2c..58b5252c07 100644 --- a/web/app/components/app/workflow-log/filter.tsx +++ b/web/app/components/app/workflow-log/filter.tsx @@ -23,6 +23,8 @@ const Filter: FC = ({ queryParams, setQueryParams }: IFilterProps) className='!min-w-[100px]' onSelect={ (item) => { + if (!item.value) + return setQueryParams({ ...queryParams, status: item.value as string }) } } diff --git a/web/app/components/base/chat/chat/answer/index.tsx b/web/app/components/base/chat/chat/answer/index.tsx index 705cd73ddf..8184967edc 100644 --- a/web/app/components/base/chat/chat/answer/index.tsx +++ b/web/app/components/base/chat/chat/answer/index.tsx @@ -85,6 +85,19 @@ const Answer: FC = ({ getContentWidth() }, [responding]) + // Recalculate contentWidth when content changes (e.g., SVG preview/source toggle) + useEffect(() => { + if (!containerRef.current) + return + const resizeObserver = new ResizeObserver(() => { + getContentWidth() + }) + resizeObserver.observe(containerRef.current) + return () => { + resizeObserver.disconnect() + } + }, []) + return (
diff --git a/web/app/components/base/markdown.tsx b/web/app/components/base/markdown.tsx index 443ee3410c..39a399cc9f 100644 --- a/web/app/components/base/markdown.tsx +++ b/web/app/components/base/markdown.tsx @@ -116,59 +116,80 @@ const CodeBlock: CodeComponent = memo(({ inline, className, children, ...props } const match = /language-(\w+)/.exec(className || '') const language = match?.[1] const languageShowName = getCorrectCapitalizationLanguageName(language || '') - let chartData = JSON.parse(String('{"title":{"text":"ECharts error - Wrong JSON format."}}').replace(/\n$/, '')) - if (language === 'echarts') { - try { - chartData = JSON.parse(String(children).replace(/\n$/, '')) + const chartData = useMemo(() => { + if (language === 'echarts') { + try { + return JSON.parse(String(children).replace(/\n$/, '')) + } + catch (error) {} } - catch (error) { - } - } + return JSON.parse('{"title":{"text":"ECharts error - Wrong JSON format."}}') + }, [language, children]) - // Use `useMemo` to ensure that `SyntaxHighlighter` only re-renders when necessary - return useMemo(() => { - return (!inline && match) - ? ( -
-
-
{languageShowName}
-
- {language === 'mermaid' && } - -
-
- {(language === 'mermaid' && isSVG) - ? () - : (language === 'echarts' - ? (
) - : (language === 'svg' - ? () - : ( - {String(children).replace(/\n$/, '')} - )))} + const renderCodeContent = useMemo(() => { + const content = String(children).replace(/\n$/, '') + if (language === 'mermaid' && isSVG) { + return + } + else if (language === 'echarts') { + return ( +
+ + +
) - : ({children}) - }, [chartData, children, className, inline, isSVG, language, languageShowName, match, props]) + } + else if (language === 'svg' && isSVG) { + return ( + + + + ) + } + else { + return ( + + {content} + + ) + } + }, [language, match, props, children, chartData, isSVG]) + + if (inline || !match) + return {children} + + return ( +
+
+
{languageShowName}
+
+ {(['mermaid', 'svg']).includes(language!) && } + +
+
+ {renderCodeContent} +
+ ) }) CodeBlock.displayName = 'CodeBlock' diff --git a/web/app/components/base/svg-gallery/index.tsx b/web/app/components/base/svg-gallery/index.tsx index 81e8e87655..4368df00e9 100644 --- a/web/app/components/base/svg-gallery/index.tsx +++ b/web/app/components/base/svg-gallery/index.tsx @@ -29,7 +29,7 @@ export const SVGRenderer = ({ content }: { content: string }) => { if (svgRef.current) { try { svgRef.current.innerHTML = '' - const draw = SVG().addTo(svgRef.current).size('100%', '100%') + const draw = SVG().addTo(svgRef.current) const parser = new DOMParser() const svgDoc = parser.parseFromString(content, 'image/svg+xml') @@ -40,13 +40,11 @@ export const SVGRenderer = ({ content }: { content: string }) => { const originalWidth = parseInt(svgElement.getAttribute('width') || '400', 10) const originalHeight = parseInt(svgElement.getAttribute('height') || '600', 10) - const scale = Math.min(windowSize.width / originalWidth, windowSize.height / originalHeight, 1) - const scaledWidth = originalWidth * scale - const scaledHeight = originalHeight * scale - draw.size(scaledWidth, scaledHeight) + draw.viewbox(0, 0, originalWidth, originalHeight) + + svgRef.current.style.width = `${Math.min(originalWidth, 298)}px` const rootElement = draw.svg(content) - rootElement.scale(scale) rootElement.click(() => { setImagePreview(svgToDataURL(svgElement as Element)) @@ -54,7 +52,7 @@ export const SVGRenderer = ({ content }: { content: string }) => { } catch (error) { if (svgRef.current) - svgRef.current.innerHTML = 'Error rendering SVG. Wait for the image content to complete.' + svgRef.current.innerHTML = 'Error rendering SVG. Wait for the image content to complete.' } } }, [content, windowSize]) @@ -62,14 +60,14 @@ export const SVGRenderer = ({ content }: { content: string }) => { return ( <>
{imagePreview && ( setImagePreview('')} />)} diff --git a/web/app/components/tools/workflow-tool/configure-button.tsx b/web/app/components/tools/workflow-tool/configure-button.tsx index d2c5142f53..6521410dae 100644 --- a/web/app/components/tools/workflow-tool/configure-button.tsx +++ b/web/app/components/tools/workflow-tool/configure-button.tsx @@ -65,7 +65,7 @@ const WorkflowToolConfigureButton = ({ else { if (item.type === 'paragraph' && param.type !== 'string') return true - if (param.type !== item.type && !(param.type === 'string' && item.type === 'paragraph')) + if (item.type === 'text-input' && param.type !== 'string') return true } } diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts index 994bf4f205..6d856003d4 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts @@ -136,6 +136,8 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => { top_k: multipleRetrievalConfig?.top_k || DATASET_DEFAULT.top_k, score_threshold: multipleRetrievalConfig?.score_threshold, reranking_model: multipleRetrievalConfig?.reranking_model, + reranking_mode: multipleRetrievalConfig?.reranking_mode, + weights: multipleRetrievalConfig?.weights, } }) setInputs(newInput) diff --git a/web/app/components/workflow/panel/chat-record/index.tsx b/web/app/components/workflow/panel/chat-record/index.tsx index 1bcfd6474d..16d2c304a7 100644 --- a/web/app/components/workflow/panel/chat-record/index.tsx +++ b/web/app/components/workflow/panel/chat-record/index.tsx @@ -90,7 +90,7 @@ const ChatRecord = () => { return (
{ supportCitationHitInfo: true, } as any} chatList={chatList} - chatContainerClassName='px-4' + chatContainerClassName='px-3' chatContainerInnerClassName='pt-6 w-full max-w-full mx-auto' chatFooterClassName='px-4 rounded-b-2xl' chatFooterInnerClassName='pb-4 w-full max-w-full mx-auto' @@ -129,6 +129,8 @@ const ChatRecord = () => { noChatInput allToolIcons={{}} showPromptLog + noSpacing + chatAnswerContainerInner='!pr-2' />
diff --git a/web/app/forgot-password/page.tsx b/web/app/forgot-password/page.tsx index fa44d1a20c..bb46011c06 100644 --- a/web/app/forgot-password/page.tsx +++ b/web/app/forgot-password/page.tsx @@ -28,7 +28,7 @@ const ForgotPassword = () => {
{token ? : }
- © {new Date().getFullYear()} Dify, Inc. All rights reserved. + © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
diff --git a/web/app/install/page.tsx b/web/app/install/page.tsx index 9fa38dd15e..395fae34ec 100644 --- a/web/app/install/page.tsx +++ b/web/app/install/page.tsx @@ -22,7 +22,7 @@ const Install = () => {
- © {new Date().getFullYear()} Dify, Inc. All rights reserved. + © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.