diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 21ec0d5fa4..965831ebe3 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,6 +12,8 @@ Please delete options that are not relevant. - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update, included: [Dify Document](https://github.com/langgenius/dify-docs) +- [ ] Improvement,including but not limited to code refactoring, performance optimization, and UI/UX improvement +- [ ] Dependency upgrade # How Has This Been Tested? diff --git a/api/core/model_runtime/model_providers/anthropic/llm/llm.py b/api/core/model_runtime/model_providers/anthropic/llm/llm.py index ad74179353..724a0401b7 100644 --- a/api/core/model_runtime/model_providers/anthropic/llm/llm.py +++ b/api/core/model_runtime/model_providers/anthropic/llm/llm.py @@ -342,12 +342,20 @@ class AnthropicLargeLanguageModel(LargeLanguageModel): Convert prompt messages to dict list and system """ system = "" - prompt_message_dicts = [] - + first_loop = True for message in prompt_messages: if isinstance(message, SystemPromptMessage): - system += message.content + ("\n" if not system else "") - else: + message.content=message.content.strip() + if first_loop: + system=message.content + first_loop=False + else: + system+="\n" + system+=message.content + + prompt_message_dicts = [] + for message in prompt_messages: + if not isinstance(message, SystemPromptMessage): prompt_message_dicts.append(self._convert_prompt_message_to_dict(message)) return system, prompt_message_dicts diff --git a/api/core/model_runtime/model_providers/azure_openai/_constant.py b/api/core/model_runtime/model_providers/azure_openai/_constant.py index 4aa767fa1d..e81a120fa0 100644 --- a/api/core/model_runtime/model_providers/azure_openai/_constant.py +++ b/api/core/model_runtime/model_providers/azure_openai/_constant.py @@ -123,6 +123,65 @@ LLM_BASE_MODELS = [ ) ) ), + AzureBaseModel( + base_model_name='gpt-35-turbo-0125', + entity=AIModelEntity( + model='fake-deployment-name', + label=I18nObject( + en_US='fake-deployment-name-label', + ), + model_type=ModelType.LLM, + features=[ + ModelFeature.AGENT_THOUGHT, + ModelFeature.MULTI_TOOL_CALL, + ModelFeature.STREAM_TOOL_CALL, + ], + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.MODE: LLMMode.CHAT.value, + ModelPropertyKey.CONTEXT_SIZE: 16385, + }, + parameter_rules=[ + ParameterRule( + name='temperature', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), + ParameterRule( + name='top_p', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), + ParameterRule( + name='presence_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY], + ), + ParameterRule( + name='frequency_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY], + ), + _get_max_tokens(default=512, min_val=1, max_val=4096), + ParameterRule( + name='response_format', + label=I18nObject( + zh_Hans='回复格式', + en_US='response_format' + ), + type='string', + help=I18nObject( + zh_Hans='指定模型必须输出的格式', + en_US='specifying the format that the model must output' + ), + required=False, + options=['text', 'json_object'] + ), + ], + pricing=PriceConfig( + input=0.0005, + output=0.0015, + unit=0.001, + currency='USD', + ) + ) + ), AzureBaseModel( base_model_name='gpt-4', entity=AIModelEntity( @@ -273,6 +332,81 @@ LLM_BASE_MODELS = [ ) ) ), + AzureBaseModel( + base_model_name='gpt-4-0125-preview', + entity=AIModelEntity( + model='fake-deployment-name', + label=I18nObject( + en_US='fake-deployment-name-label', + ), + model_type=ModelType.LLM, + features=[ + ModelFeature.AGENT_THOUGHT, + ModelFeature.MULTI_TOOL_CALL, + ModelFeature.STREAM_TOOL_CALL, + ], + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.MODE: LLMMode.CHAT.value, + ModelPropertyKey.CONTEXT_SIZE: 128000, + }, + parameter_rules=[ + ParameterRule( + name='temperature', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), + ParameterRule( + name='top_p', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), + ParameterRule( + name='presence_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY], + ), + ParameterRule( + name='frequency_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY], + ), + _get_max_tokens(default=512, min_val=1, max_val=4096), + ParameterRule( + name='seed', + label=I18nObject( + zh_Hans='种子', + en_US='Seed' + ), + type='int', + help=I18nObject( + zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。', + en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.' + ), + required=False, + precision=2, + min=0, + max=1, + ), + ParameterRule( + name='response_format', + label=I18nObject( + zh_Hans='回复格式', + en_US='response_format' + ), + type='string', + help=I18nObject( + zh_Hans='指定模型必须输出的格式', + en_US='specifying the format that the model must output' + ), + required=False, + options=['text', 'json_object'] + ), + ], + pricing=PriceConfig( + input=0.01, + output=0.03, + unit=0.001, + currency='USD', + ) + ) + ), AzureBaseModel( base_model_name='gpt-4-1106-preview', entity=AIModelEntity( diff --git a/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml b/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml index 224f2a08a1..792d051d94 100644 --- a/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml +++ b/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml @@ -75,6 +75,12 @@ model_credential_schema: show_on: - variable: __model_type value: llm + - label: + en_US: gpt-35-turbo-0125 + value: gpt-35-turbo-0125 + show_on: + - variable: __model_type + value: llm - label: en_US: gpt-35-turbo-16k value: gpt-35-turbo-16k @@ -93,6 +99,12 @@ model_credential_schema: show_on: - variable: __model_type value: llm + - label: + en_US: gpt-4-0125-preview + value: gpt-4-0125-preview + show_on: + - variable: __model_type + value: llm - label: en_US: gpt-4-1106-preview value: gpt-4-1106-preview diff --git a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py index 535714f663..5ae90d54b5 100644 --- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py @@ -124,7 +124,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel): elif err == 'insufficient_quota': raise InsufficientAccountBalance(msg) elif err == 'invalid_authentication': - raise InvalidAuthenticationError(msg) + raise InvalidAuthenticationError(msg) elif err and 'rate' in err: raise RateLimitReachedError(msg) elif err and 'internal' in err: diff --git a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml index 05cd402d4e..e1923f8f8a 100644 --- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml +++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml @@ -48,23 +48,23 @@ provider_credential_schema: - value: us-east-1 label: en_US: US East (N. Virginia) - zh_Hans: US East (N. Virginia) + zh_Hans: 美国东部 (弗吉尼亚北部) - value: us-west-2 label: en_US: US West (Oregon) - zh_Hans: US West (Oregon) + zh_Hans: 美国西部 (俄勒冈州) - value: ap-southeast-1 label: en_US: Asia Pacific (Singapore) - zh_Hans: Asia Pacific (Singapore) + zh_Hans: 亚太地区 (新加坡) - value: ap-northeast-1 label: en_US: Asia Pacific (Tokyo) - zh_Hans: Asia Pacific (Tokyo) + zh_Hans: 亚太地区 (东京) - value: eu-central-1 label: en_US: Europe (Frankfurt) - zh_Hans: Europe (Frankfurt) + zh_Hans: 欧洲 (法兰克福) - value: us-gov-west-1 label: en_US: AWS GovCloud (US-West) diff --git a/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml b/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml index c4be732f2e..a4cfbd171e 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml @@ -4,6 +4,8 @@ - anthropic.claude-v1 - anthropic.claude-v2 - anthropic.claude-v2:1 +- anthropic.claude-3-sonnet-v1:0 +- anthropic.claude-3-haiku-v1:0 - cohere.command-light-text-v14 - cohere.command-text-v14 - meta.llama2-13b-chat-v1 diff --git a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml new file mode 100644 index 0000000000..73fe5567fc --- /dev/null +++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml @@ -0,0 +1,57 @@ +model: anthropic.claude-3-haiku-20240307-v1:0 +label: + en_US: Claude 3 Haiku +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 200000 +# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html +parameter_rules: + - name: max_tokens + use_template: max_tokens + required: true + type: int + default: 4096 + min: 1 + max: 4096 + help: + zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。 + en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter. + # docs: https://docs.anthropic.com/claude/docs/system-prompts + - name: temperature + use_template: temperature + required: false + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。 + en_US: The amount of randomness injected into the response. + - name: top_p + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。 + en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both. + - name: top_k + required: false + type: int + default: 0 + min: 0 + # tip docs from aws has error, max value is 500 + max: 500 + help: + zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 + en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. +pricing: + input: '0.003' + output: '0.015' + unit: '0.001' + currency: USD diff --git a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml new file mode 100644 index 0000000000..cb11df0b60 --- /dev/null +++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml @@ -0,0 +1,56 @@ +model: anthropic.claude-3-sonnet-20240229-v1:0 +label: + en_US: Claude 3 Sonnet +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 200000 +# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html +parameter_rules: + - name: max_tokens + use_template: max_tokens + required: true + type: int + default: 4096 + min: 1 + max: 4096 + help: + zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。 + en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter. + - name: temperature + use_template: temperature + required: false + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。 + en_US: The amount of randomness injected into the response. + - name: top_p + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。 + en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both. + - name: top_k + required: false + type: int + default: 0 + min: 0 + # tip docs from aws has error, max value is 500 + max: 500 + help: + zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 + en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. +pricing: + input: '0.00025' + output: '0.00125' + unit: '0.001' + currency: USD diff --git a/api/core/model_runtime/model_providers/bedrock/llm/llm.py b/api/core/model_runtime/model_providers/bedrock/llm/llm.py index c6aaa24ade..b274cec35f 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py +++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py @@ -1,9 +1,22 @@ +import base64 import json import logging +import mimetypes +import time from collections.abc import Generator -from typing import Optional, Union +from typing import Optional, Union, cast import boto3 +import requests +from anthropic import AnthropicBedrock, Stream +from anthropic.types import ( + ContentBlockDeltaEvent, + Message, + MessageDeltaEvent, + MessageStartEvent, + MessageStopEvent, + MessageStreamEvent, +) from botocore.config import Config from botocore.exceptions import ( ClientError, @@ -13,14 +26,18 @@ from botocore.exceptions import ( UnknownServiceError, ) -from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from core.model_runtime.entities.message_entities import ( AssistantPromptMessage, + ImagePromptMessageContent, PromptMessage, + PromptMessageContentType, PromptMessageTool, SystemPromptMessage, + TextPromptMessageContent, UserPromptMessage, ) +from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.errors.invoke import ( InvokeAuthorizationError, InvokeBadRequestError, @@ -54,9 +71,293 @@ class BedrockLargeLanguageModel(LargeLanguageModel): :param user: unique user id :return: full response or stream response chunk generator result """ + + # invoke claude 3 models via anthropic official SDK + if "anthropic.claude-3" in model: + return self._invoke_claude3(model, credentials, prompt_messages, model_parameters, stop, stream, user) # invoke model return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user) + def _invoke_claude3(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, + stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]: + """ + Invoke Claude3 large language model + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param stop: stop words + :param stream: is stream response + :return: full response or stream response chunk generator result + """ + # use Anthropic official SDK references + # - https://docs.anthropic.com/claude/reference/claude-on-amazon-bedrock + # - https://github.com/anthropics/anthropic-sdk-python + client = AnthropicBedrock( + aws_access_key=credentials["aws_access_key_id"], + aws_secret_key=credentials["aws_secret_access_key"], + aws_region=credentials["aws_region"], + ) + + extra_model_kwargs = {} + if stop: + extra_model_kwargs['stop_sequences'] = stop + + # Notice: If you request the current version of the SDK to the bedrock server, + # you will get the following error message and you need to wait for the service or SDK to be updated. + # Response: Error code: 400 + # {'message': 'Malformed input request: #: subject must not be valid against schema + # {"required":["messages"]}#: extraneous key [metadata] is not permitted, please reformat your input and try again.'} + # TODO: Open in the future when the interface is properly supported + # if user: + # ref: https://github.com/anthropics/anthropic-sdk-python/blob/e84645b07ca5267066700a104b4d8d6a8da1383d/src/anthropic/resources/messages.py#L465 + # extra_model_kwargs['metadata'] = message_create_params.Metadata(user_id=user) + + system, prompt_message_dicts = self._convert_claude3_prompt_messages(prompt_messages) + + if system: + extra_model_kwargs['system'] = system + + response = client.messages.create( + model=model, + messages=prompt_message_dicts, + stream=stream, + **model_parameters, + **extra_model_kwargs + ) + + if stream: + return self._handle_claude3_stream_response(model, credentials, response, prompt_messages) + + return self._handle_claude3_response(model, credentials, response, prompt_messages) + + def _handle_claude3_response(self, model: str, credentials: dict, response: Message, + prompt_messages: list[PromptMessage]) -> LLMResult: + """ + Handle llm chat response + + :param model: model name + :param credentials: credentials + :param response: response + :param prompt_messages: prompt messages + :return: full response chunk generator result + """ + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage( + content=response.content[0].text + ) + + # calculate num tokens + if response.usage: + # transform usage + prompt_tokens = response.usage.input_tokens + completion_tokens = response.usage.output_tokens + else: + # calculate num tokens + prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) + completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) + + # transform usage + usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) + + # transform response + response = LLMResult( + model=response.model, + prompt_messages=prompt_messages, + message=assistant_prompt_message, + usage=usage + ) + + return response + + def _handle_claude3_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent], + prompt_messages: list[PromptMessage], ) -> Generator: + """ + Handle llm chat stream response + + :param model: model name + :param credentials: credentials + :param response: response + :param prompt_messages: prompt messages + :return: full response or stream response chunk generator result + """ + + try: + full_assistant_content = '' + return_model = None + input_tokens = 0 + output_tokens = 0 + finish_reason = None + index = 0 + + for chunk in response: + if isinstance(chunk, MessageStartEvent): + return_model = chunk.message.model + input_tokens = chunk.message.usage.input_tokens + elif isinstance(chunk, MessageDeltaEvent): + output_tokens = chunk.usage.output_tokens + finish_reason = chunk.delta.stop_reason + elif isinstance(chunk, MessageStopEvent): + usage = self._calc_response_usage(model, credentials, input_tokens, output_tokens) + yield LLMResultChunk( + model=return_model, + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=index + 1, + message=AssistantPromptMessage( + content='' + ), + finish_reason=finish_reason, + usage=usage + ) + ) + elif isinstance(chunk, ContentBlockDeltaEvent): + chunk_text = chunk.delta.text if chunk.delta.text else '' + full_assistant_content += chunk_text + assistant_prompt_message = AssistantPromptMessage( + content=chunk_text if chunk_text else '', + ) + index = chunk.index + yield LLMResultChunk( + model=model, + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=index, + message=assistant_prompt_message, + ) + ) + except Exception as ex: + raise InvokeError(str(ex)) + + def _calc_claude3_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage: + """ + Calculate response usage + + :param model: model name + :param credentials: model credentials + :param prompt_tokens: prompt tokens + :param completion_tokens: completion tokens + :return: usage + """ + # get prompt price info + prompt_price_info = self.get_price( + model=model, + credentials=credentials, + price_type=PriceType.INPUT, + tokens=prompt_tokens, + ) + + # get completion price info + completion_price_info = self.get_price( + model=model, + credentials=credentials, + price_type=PriceType.OUTPUT, + tokens=completion_tokens + ) + + # transform usage + usage = LLMUsage( + prompt_tokens=prompt_tokens, + prompt_unit_price=prompt_price_info.unit_price, + prompt_price_unit=prompt_price_info.unit, + prompt_price=prompt_price_info.total_amount, + completion_tokens=completion_tokens, + completion_unit_price=completion_price_info.unit_price, + completion_price_unit=completion_price_info.unit, + completion_price=completion_price_info.total_amount, + total_tokens=prompt_tokens + completion_tokens, + total_price=prompt_price_info.total_amount + completion_price_info.total_amount, + currency=prompt_price_info.currency, + latency=time.perf_counter() - self.started_at + ) + + return usage + + def _convert_claude3_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]: + """ + Convert prompt messages to dict list and system + """ + + system = "" + first_loop = True + for message in prompt_messages: + if isinstance(message, SystemPromptMessage): + message.content=message.content.strip() + if first_loop: + system=message.content + first_loop=False + else: + system+="\n" + system+=message.content + + prompt_message_dicts = [] + for message in prompt_messages: + if not isinstance(message, SystemPromptMessage): + prompt_message_dicts.append(self._convert_claude3_prompt_message_to_dict(message)) + + return system, prompt_message_dicts + + def _convert_claude3_prompt_message_to_dict(self, message: PromptMessage) -> dict: + """ + Convert PromptMessage to dict + """ + if isinstance(message, UserPromptMessage): + message = cast(UserPromptMessage, message) + if isinstance(message.content, str): + message_dict = {"role": "user", "content": message.content} + else: + sub_messages = [] + for message_content in message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast(TextPromptMessageContent, message_content) + sub_message_dict = { + "type": "text", + "text": message_content.data + } + sub_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast(ImagePromptMessageContent, message_content) + if not message_content.data.startswith("data:"): + # fetch image data from url + try: + image_content = requests.get(message_content.data).content + mime_type, _ = mimetypes.guess_type(message_content.data) + base64_data = base64.b64encode(image_content).decode('utf-8') + except Exception as ex: + raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}") + else: + data_split = message_content.data.split(";base64,") + mime_type = data_split[0].replace("data:", "") + base64_data = data_split[1] + + if mime_type not in ["image/jpeg", "image/png", "image/gif", "image/webp"]: + raise ValueError(f"Unsupported image type {mime_type}, " + f"only support image/jpeg, image/png, image/gif, and image/webp") + + sub_message_dict = { + "type": "image", + "source": { + "type": "base64", + "media_type": mime_type, + "data": base64_data + } + } + sub_messages.append(sub_message_dict) + + message_dict = {"role": "user", "content": sub_messages} + elif isinstance(message, AssistantPromptMessage): + message = cast(AssistantPromptMessage, message) + message_dict = {"role": "assistant", "content": message.content} + elif isinstance(message, SystemPromptMessage): + message = cast(SystemPromptMessage, message) + message_dict = {"role": "system", "content": message.content} + else: + raise ValueError(f"Got unknown type {message}") + + return message_dict + def get_num_tokens(self, model: str, credentials: dict, messages: list[PromptMessage] | str, tools: Optional[list[PromptMessageTool]] = None) -> int: """ @@ -101,7 +402,19 @@ class BedrockLargeLanguageModel(LargeLanguageModel): :param credentials: model credentials :return: """ - + + if "anthropic.claude-3" in model: + try: + self._invoke_claude3(model=model, + credentials=credentials, + prompt_messages=[{"role": "user", "content": "ping"}], + model_parameters={}, + stop=None, + stream=False) + + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + try: ping_message = UserPromptMessage(content="ping") self._generate(model=model, diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py index e4388699e3..3589ca77cc 100644 --- a/api/core/model_runtime/model_providers/ollama/llm/llm.py +++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py @@ -449,7 +449,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel): help=I18nObject(en_US="The temperature of the model. " "Increasing the temperature will make the model answer " "more creatively. (Default: 0.8)"), - default=0.8, + default=0.1, min=0, max=2 ), @@ -472,7 +472,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): help=I18nObject(en_US="Reduces the probability of generating nonsense. " "A higher value (e.g. 100) will give more diverse answers, " "while a lower value (e.g. 10) will be more conservative. (Default: 40)"), - default=40, min=1, max=100 ), @@ -483,7 +482,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): help=I18nObject(en_US="Sets how strongly to penalize repetitions. " "A higher value (e.g., 1.5) will penalize repetitions more strongly, " "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)"), - default=1.1, min=-2, max=2 ), @@ -494,7 +492,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel): type=ParameterType.INT, help=I18nObject(en_US="Maximum number of tokens to predict when generating text. " "(Default: 128, -1 = infinite generation, -2 = fill context)"), - default=128, + default=512 if int(credentials.get('max_tokens', 4096)) >= 768 else 128, min=-2, max=int(credentials.get('max_tokens', 4096)), ), @@ -504,7 +502,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): type=ParameterType.INT, help=I18nObject(en_US="Enable Mirostat sampling for controlling perplexity. " "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"), - default=0, min=0, max=2 ), @@ -516,7 +513,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): "the generated text. A lower learning rate will result in slower adjustments, " "while a higher learning rate will make the algorithm more responsive. " "(Default: 0.1)"), - default=0.1, precision=1 ), ParameterRule( @@ -525,7 +521,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): type=ParameterType.FLOAT, help=I18nObject(en_US="Controls the balance between coherence and diversity of the output. " "A lower value will result in more focused and coherent text. (Default: 5.0)"), - default=5.0, precision=1 ), ParameterRule( @@ -543,7 +538,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): type=ParameterType.INT, help=I18nObject(en_US="The number of layers to send to the GPU(s). " "On macOS it defaults to 1 to enable metal support, 0 to disable."), - default=1, min=0, max=1 ), @@ -563,7 +557,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): type=ParameterType.INT, help=I18nObject(en_US="Sets how far back for the model to look back to prevent repetition. " "(Default: 64, 0 = disabled, -1 = num_ctx)"), - default=64, min=-1 ), ParameterRule( @@ -573,7 +566,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): help=I18nObject(en_US="Tail free sampling is used to reduce the impact of less probable tokens " "from the output. A higher value (e.g., 2.0) will reduce the impact more, " "while a value of 1.0 disables this setting. (default: 1)"), - default=1, precision=1 ), ParameterRule( @@ -583,7 +575,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel): help=I18nObject(en_US="Sets the random number seed to use for generation. Setting this to " "a specific number will make the model generate the same text for " "the same prompt. (Default: 0)"), - default=0 ), ParameterRule( name='format', diff --git a/api/core/model_runtime/model_providers/openai/llm/llm.py b/api/core/model_runtime/model_providers/openai/llm/llm.py index 2ea65780f1..46f17fe19b 100644 --- a/api/core/model_runtime/model_providers/openai/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai/llm/llm.py @@ -656,6 +656,8 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): if assistant_message_function_call: # start of stream function call delta_assistant_message_function_call_storage = assistant_message_function_call + if delta_assistant_message_function_call_storage.arguments is None: + delta_assistant_message_function_call_storage.arguments = '' if not has_finish_reason: continue diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml index 3461863e67..691347e701 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml @@ -8,54 +8,70 @@ model_properties: parameter_rules: - name: temperature use_template: temperature - default: 1.0 + type: float + default: 0.85 min: 0.0 max: 2.0 help: zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: top_p use_template: top_p + type: float default: 0.8 min: 0.1 max: 0.9 help: zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. - - name: max_tokens - use_template: max_tokens - default: 1500 - min: 1 - max: 6000 - help: - zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。 - en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated. - name: top_k + type: int + min: 0 + max: 99 label: zh_Hans: 取样数量 en_US: Top k - type: int help: - zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。 - en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect. - required: false + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. - name: seed + required: false + type: int + default: 1234 label: zh_Hans: 随机种子 en_US: Random seed - type: int help: - zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。 - en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. - required: false + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. - name: repetition_penalty - label: - en_US: Repetition penalty + required: false type: float default: 1.1 + label: + en_US: Repetition penalty help: zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 - en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment. - required: false + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. - name: response_format use_template: response_format +pricing: + input: '0.12' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml index 9089c5904a..91129d37dd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml @@ -4,58 +4,74 @@ label: model_type: llm model_properties: mode: chat - context_size: 30000 + context_size: 32768 parameter_rules: - name: temperature use_template: temperature - default: 1.0 + type: float + default: 0.85 min: 0.0 max: 2.0 help: zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: top_p use_template: top_p + type: float default: 0.8 min: 0.1 max: 0.9 help: zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. - - name: max_tokens - use_template: max_tokens - default: 2000 - min: 1 - max: 28000 - help: - zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。 - en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated. - name: top_k + type: int + min: 0 + max: 99 label: zh_Hans: 取样数量 en_US: Top k - type: int help: - zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。 - en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect. - required: false + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. - name: seed + required: false + type: int + default: 1234 label: zh_Hans: 随机种子 en_US: Random seed - type: int help: - zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。 - en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. - required: false + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. - name: repetition_penalty - label: - en_US: Repetition penalty + required: false type: float default: 1.1 + label: + en_US: Repetition penalty help: zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 - en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment. - required: false + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. - name: response_format use_template: response_format +pricing: + input: '0.12' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml index eb1e8ac09b..5d6b69f21f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml @@ -8,54 +8,70 @@ model_properties: parameter_rules: - name: temperature use_template: temperature - default: 1.0 + type: float + default: 0.85 min: 0.0 max: 2.0 help: zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: top_p use_template: top_p + type: float default: 0.8 min: 0.1 max: 0.9 help: zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. - - name: max_tokens - use_template: max_tokens - default: 1500 - min: 1 - max: 6000 - help: - zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。 - en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated. - name: top_k + type: int + min: 0 + max: 99 label: zh_Hans: 取样数量 en_US: Top k - type: int help: - zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。 - en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect. - required: false + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. - name: seed + required: false + type: int + default: 1234 label: zh_Hans: 随机种子 en_US: Random seed - type: int help: - zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。 - en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. - required: false + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. - name: repetition_penalty - label: - en_US: Repetition penalty + required: false type: float default: 1.1 + label: + en_US: Repetition penalty help: zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 - en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment. - required: false + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. - name: response_format use_template: response_format +pricing: + input: '0.12' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml index 83640371f9..7c25e8802b 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml @@ -4,58 +4,70 @@ label: model_type: llm model_properties: mode: completion - context_size: 32000 + context_size: 32768 parameter_rules: - name: temperature use_template: temperature - default: 1.0 + type: float + default: 0.85 min: 0.0 max: 2.0 help: zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 1500 + min: 1 + max: 1500 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: top_p use_template: top_p + type: float default: 0.8 min: 0.1 max: 0.9 help: zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. - - name: max_tokens - use_template: max_tokens - default: 2000 - min: 1 - max: 30000 - help: - zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。 - en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated. - name: top_k + type: int + min: 0 + max: 99 label: zh_Hans: 取样数量 en_US: Top k - type: int help: - zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。 - en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect. - required: false + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. - name: seed + required: false + type: int + default: 1234 label: zh_Hans: 随机种子 en_US: Random seed - type: int help: - zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。 - en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. - required: false + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. - name: repetition_penalty - label: - en_US: Repetition penalty + required: false type: float default: 1.1 + label: + en_US: Repetition penalty help: zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 - en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment. + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. - name: response_format use_template: response_format pricing: diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml index 5455555bbd..20b46de6f3 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml @@ -8,55 +8,66 @@ model_properties: parameter_rules: - name: temperature use_template: temperature - default: 1.0 + type: float + default: 0.85 min: 0.0 max: 2.0 help: zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 1500 + min: 1 + max: 1500 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: top_p use_template: top_p + type: float default: 0.8 min: 0.1 max: 0.9 help: zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. - - name: max_tokens - use_template: max_tokens - default: 1500 - min: 1 - max: 6000 - help: - zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。 - en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated. - name: top_k + type: int + min: 0 + max: 99 label: zh_Hans: 取样数量 en_US: Top k - type: int help: - zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。 - en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect. - required: false + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. - name: seed + required: false + type: int + default: 1234 label: zh_Hans: 随机种子 en_US: Random seed - type: int help: - zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。 - en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. - required: false + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. - name: repetition_penalty - label: - en_US: Repetition penalty + required: false type: float default: 1.1 + label: + en_US: Repetition penalty help: zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 - en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment. - required: false + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. - name: response_format use_template: response_format pricing: diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/__init__.py b/api/core/model_runtime/model_providers/tongyi/text_embedding/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml new file mode 100644 index 0000000000..eed09f95de --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml @@ -0,0 +1,4 @@ +model: text-embedding-v1 +model_type: text-embedding +model_properties: + context_size: 2048 diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml new file mode 100644 index 0000000000..db2fa861e6 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml @@ -0,0 +1,4 @@ +model: text-embedding-v2 +model_type: text-embedding +model_properties: + context_size: 2048 diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py new file mode 100644 index 0000000000..a5f3660fb2 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py @@ -0,0 +1,132 @@ +import time +from typing import Optional + +import dashscope + +from core.model_runtime.entities.model_entities import PriceType +from core.model_runtime.entities.text_embedding_entities import ( + EmbeddingUsage, + TextEmbeddingResult, +) +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.text_embedding_model import ( + TextEmbeddingModel, +) +from core.model_runtime.model_providers.tongyi._common import _CommonTongyi + + +class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel): + """ + Model class for Tongyi text embedding model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :return: embeddings result + """ + credentials_kwargs = self._to_credential_kwargs(credentials) + dashscope.api_key = credentials_kwargs["dashscope_api_key"] + embeddings, embedding_used_tokens = self.embed_documents(model, texts) + + return TextEmbeddingResult( + embeddings=embeddings, + usage=self._calc_response_usage(model, credentials_kwargs, embedding_used_tokens), + model=model + ) + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :return: + """ + if len(texts) == 0: + return 0 + total_num_tokens = 0 + for text in texts: + total_num_tokens += self._get_num_tokens_by_gpt2(text) + + return total_num_tokens + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + # transform credentials to kwargs for model instance + credentials_kwargs = self._to_credential_kwargs(credentials) + dashscope.api_key = credentials_kwargs["dashscope_api_key"] + # call embedding model + self.embed_documents(model=model, texts=["ping"]) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + @staticmethod + def embed_documents(model: str, texts: list[str]) -> tuple[list[list[float]], int]: + """Call out to Tongyi's embedding endpoint. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text, and tokens usage. + """ + embeddings = [] + embedding_used_tokens = 0 + for text in texts: + response = dashscope.TextEmbedding.call(model=model, input=text, text_type="document") + data = response.output["embeddings"][0] + embeddings.append(data["embedding"]) + embedding_used_tokens += response.usage["total_tokens"] + + return [list(map(float, e)) for e in embeddings], embedding_used_tokens + + def _calc_response_usage( + self, model: str, credentials: dict, tokens: int + ) -> EmbeddingUsage: + """ + Calculate response usage + + :param model: model name + :param tokens: input tokens + :return: usage + """ + # get input price info + input_price_info = self.get_price( + model=model, + credentials=credentials, + price_type=PriceType.INPUT, + tokens=tokens + ) + + # transform usage + usage = EmbeddingUsage( + tokens=tokens, + total_tokens=tokens, + unit_price=input_price_info.unit_price, + price_unit=input_price_info.unit, + total_price=input_price_info.total_amount, + currency=input_price_info.currency, + latency=time.perf_counter() - self.started_at + ) + + return usage diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml index 500fd6e045..b251391e34 100644 --- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml +++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml @@ -17,15 +17,16 @@ help: supported_model_types: - llm - tts + - text-embedding configurate_methods: - predefined-model provider_credential_schema: credential_form_schemas: - variable: dashscope_api_key label: - en_US: APIKey + en_US: API Key type: secret-input required: true placeholder: - zh_Hans: 在此输入您的 APIKey - en_US: Enter your APIKey + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key diff --git a/api/core/model_runtime/model_providers/yi/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/yi/_assets/icon_l_en.svg index 0efce4e85b..9ce3baddaa 100644 --- a/api/core/model_runtime/model_providers/yi/_assets/icon_l_en.svg +++ b/api/core/model_runtime/model_providers/yi/_assets/icon_l_en.svg @@ -1,20 +1,12 @@ - - - - - - - - - - - - - - - - - - 01.AI - - + + + + + + + + + + + + \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/yi/_assets/icon_l_zh.svg b/api/core/model_runtime/model_providers/yi/_assets/icon_l_zh.svg deleted file mode 100644 index 951842da55..0000000000 --- a/api/core/model_runtime/model_providers/yi/_assets/icon_l_zh.svg +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - - - 零一万物 - - diff --git a/api/core/model_runtime/model_providers/yi/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/yi/_assets/icon_s_en.svg index a813274466..eb0395a21c 100644 --- a/api/core/model_runtime/model_providers/yi/_assets/icon_s_en.svg +++ b/api/core/model_runtime/model_providers/yi/_assets/icon_s_en.svg @@ -1,7 +1,8 @@ - - - - - - - \ No newline at end of file + + + + + + + + \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/yi/yi.yaml b/api/core/model_runtime/model_providers/yi/yi.yaml index 368c715456..a8c0d857b6 100644 --- a/api/core/model_runtime/model_providers/yi/yi.yaml +++ b/api/core/model_runtime/model_providers/yi/yi.yaml @@ -9,7 +9,7 @@ icon_small: en_US: icon_s_en.svg icon_large: en_US: icon_l_en.svg -background: "#EFFDFD" +background: "#E9F1EC" help: title: en_US: Get your API Key from 01.ai diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml index ca7b1c1f45..6b5bcc5bcf 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml @@ -32,3 +32,8 @@ parameter_rules: zh_Hans: SSE接口调用时,用于控制每次返回内容方式是增量还是全量,不提供此参数时默认为增量返回,true 为增量返回,false 为全量返回。 en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return. required: false + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 8192 diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml index a768902a77..ddea331c8e 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml @@ -30,3 +30,8 @@ parameter_rules: zh_Hans: SSE接口调用时,用于控制每次返回内容方式是增量还是全量,不提供此参数时默认为增量返回,true 为增量返回,false 为全量返回。 en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return. required: false + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 8192 diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py index f7a61b0b0c..437f871864 100644 --- a/api/core/tools/entities/tool_entities.py +++ b/api/core/tools/entities/tool_entities.py @@ -171,6 +171,7 @@ class ToolProviderCredentials(BaseModel): SECRET_INPUT = "secret-input" TEXT_INPUT = "text-input" SELECT = "select" + BOOLEAN = "boolean" @classmethod def value_of(cls, value: str) -> "ToolProviderCredentials.CredentialsType": @@ -192,7 +193,7 @@ class ToolProviderCredentials(BaseModel): name: str = Field(..., description="The name of the credentials") type: CredentialsType = Field(..., description="The type of the credentials") required: bool = False - default: Optional[str] = None + default: Optional[Union[int, str]] = None options: Optional[list[ToolCredentialsOption]] = None label: Optional[I18nObject] = None help: Optional[I18nObject] = None diff --git a/api/core/tools/provider/builtin/bing/bing.py b/api/core/tools/provider/builtin/bing/bing.py index ff131b26cd..6e62abfc10 100644 --- a/api/core/tools/provider/builtin/bing/bing.py +++ b/api/core/tools/provider/builtin/bing/bing.py @@ -12,12 +12,11 @@ class BingProvider(BuiltinToolProviderController): meta={ "credentials": credentials, } - ).invoke( - user_id='', + ).validate_credentials( + credentials=credentials, tool_parameters={ "query": "test", "result_type": "link", - "enable_webpages": True, }, ) except Exception as e: diff --git a/api/core/tools/provider/builtin/bing/bing.yaml b/api/core/tools/provider/builtin/bing/bing.yaml index 9df836929c..35cd729208 100644 --- a/api/core/tools/provider/builtin/bing/bing.yaml +++ b/api/core/tools/provider/builtin/bing/bing.yaml @@ -43,3 +43,63 @@ credentials_for_provider: zh_Hans: 例如 "https://api.bing.microsoft.com/v7.0/search" pt_BR: An endpoint is like "https://api.bing.microsoft.com/v7.0/search" default: https://api.bing.microsoft.com/v7.0/search + allow_entities: + type: boolean + required: false + label: + en_US: Allow Entities Search + zh_Hans: 支持实体搜索 + pt_BR: Allow Entities Search + help: + en_US: Does your subscription plan allow entity search + zh_Hans: 您的订阅计划是否支持实体搜索 + pt_BR: Does your subscription plan allow entity search + default: true + allow_web_pages: + type: boolean + required: false + label: + en_US: Allow Web Pages Search + zh_Hans: 支持网页搜索 + pt_BR: Allow Web Pages Search + help: + en_US: Does your subscription plan allow web pages search + zh_Hans: 您的订阅计划是否支持网页搜索 + pt_BR: Does your subscription plan allow web pages search + default: true + allow_computation: + type: boolean + required: false + label: + en_US: Allow Computation Search + zh_Hans: 支持计算搜索 + pt_BR: Allow Computation Search + help: + en_US: Does your subscription plan allow computation search + zh_Hans: 您的订阅计划是否支持计算搜索 + pt_BR: Does your subscription plan allow computation search + default: false + allow_news: + type: boolean + required: false + label: + en_US: Allow News Search + zh_Hans: 支持新闻搜索 + pt_BR: Allow News Search + help: + en_US: Does your subscription plan allow news search + zh_Hans: 您的订阅计划是否支持新闻搜索 + pt_BR: Does your subscription plan allow news search + default: false + allow_related_searches: + type: boolean + required: false + label: + en_US: Allow Related Searches + zh_Hans: 支持相关搜索 + pt_BR: Allow Related Searches + help: + en_US: Does your subscription plan allow related searches + zh_Hans: 您的订阅计划是否支持相关搜索 + pt_BR: Does your subscription plan allow related searches + default: false diff --git a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py index 7b740293dd..8f11d2173c 100644 --- a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py +++ b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py @@ -10,53 +10,23 @@ from core.tools.tool.builtin_tool import BuiltinTool class BingSearchTool(BuiltinTool): url = 'https://api.bing.microsoft.com/v7.0/search' - def _invoke(self, - user_id: str, - tool_parameters: dict[str, Any], - ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + def _invoke_bing(self, + user_id: str, + subscription_key: str, query: str, limit: int, + result_type: str, market: str, lang: str, + filters: list[str]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: """ - invoke tools + invoke bing search """ - - key = self.runtime.credentials.get('subscription_key', None) - if not key: - raise Exception('subscription_key is required') - - server_url = self.runtime.credentials.get('server_url', None) - if not server_url: - server_url = self.url - - query = tool_parameters.get('query', None) - if not query: - raise Exception('query is required') - - limit = min(tool_parameters.get('limit', 5), 10) - result_type = tool_parameters.get('result_type', 'text') or 'text' - - market = tool_parameters.get('market', 'US') - lang = tool_parameters.get('language', 'en') - filter = [] - - if tool_parameters.get('enable_computation', False): - filter.append('Computation') - if tool_parameters.get('enable_entities', False): - filter.append('Entities') - if tool_parameters.get('enable_news', False): - filter.append('News') - if tool_parameters.get('enable_related_search', False): - filter.append('RelatedSearches') - if tool_parameters.get('enable_webpages', False): - filter.append('WebPages') - market_code = f'{lang}-{market}' accept_language = f'{lang},{market_code};q=0.9' headers = { - 'Ocp-Apim-Subscription-Key': key, + 'Ocp-Apim-Subscription-Key': subscription_key, 'Accept-Language': accept_language } query = quote(query) - server_url = f'{server_url}?q={query}&mkt={market_code}&count={limit}&responseFilter={",".join(filter)}' + server_url = f'{self.url}?q={query}&mkt={market_code}&count={limit}&responseFilter={",".join(filters)}' response = get(server_url, headers=headers) if response.status_code != 200: @@ -124,3 +94,105 @@ class BingSearchTool(BuiltinTool): text += f'{related["displayText"]} - {related["webSearchUrl"]}\n' return self.create_text_message(text=self.summary(user_id=user_id, content=text)) + + + def validate_credentials(self, credentials: dict[str, Any], tool_parameters: dict[str, Any]) -> None: + key = credentials.get('subscription_key', None) + if not key: + raise Exception('subscription_key is required') + + server_url = credentials.get('server_url', None) + if not server_url: + server_url = self.url + + query = tool_parameters.get('query', None) + if not query: + raise Exception('query is required') + + limit = min(tool_parameters.get('limit', 5), 10) + result_type = tool_parameters.get('result_type', 'text') or 'text' + + market = tool_parameters.get('market', 'US') + lang = tool_parameters.get('language', 'en') + filter = [] + + if credentials.get('allow_entities', False): + filter.append('Entities') + + if credentials.get('allow_computation', False): + filter.append('Computation') + + if credentials.get('allow_news', False): + filter.append('News') + + if credentials.get('allow_related_searches', False): + filter.append('RelatedSearches') + + if credentials.get('allow_web_pages', False): + filter.append('WebPages') + + if not filter: + raise Exception('At least one filter is required') + + self._invoke_bing( + user_id='test', + subscription_key=key, + query=query, + limit=limit, + result_type=result_type, + market=market, + lang=lang, + filters=filter + ) + + def _invoke(self, + user_id: str, + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + """ + invoke tools + """ + + key = self.runtime.credentials.get('subscription_key', None) + if not key: + raise Exception('subscription_key is required') + + server_url = self.runtime.credentials.get('server_url', None) + if not server_url: + server_url = self.url + + query = tool_parameters.get('query', None) + if not query: + raise Exception('query is required') + + limit = min(tool_parameters.get('limit', 5), 10) + result_type = tool_parameters.get('result_type', 'text') or 'text' + + market = tool_parameters.get('market', 'US') + lang = tool_parameters.get('language', 'en') + filter = [] + + if tool_parameters.get('enable_computation', False): + filter.append('Computation') + if tool_parameters.get('enable_entities', False): + filter.append('Entities') + if tool_parameters.get('enable_news', False): + filter.append('News') + if tool_parameters.get('enable_related_search', False): + filter.append('RelatedSearches') + if tool_parameters.get('enable_webpages', False): + filter.append('WebPages') + + if not filter: + raise Exception('At least one filter is required') + + return self._invoke_bing( + user_id=user_id, + subscription_key=key, + query=query, + limit=limit, + result_type=result_type, + market=market, + lang=lang, + filters=filter + ) \ No newline at end of file diff --git a/api/core/tools/provider/builtin/openweather/_assets/icon.svg b/api/core/tools/provider/builtin/openweather/_assets/icon.svg new file mode 100644 index 0000000000..f06cd87e64 --- /dev/null +++ b/api/core/tools/provider/builtin/openweather/_assets/icon.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/api/core/tools/provider/builtin/openweather/openweather.py b/api/core/tools/provider/builtin/openweather/openweather.py new file mode 100644 index 0000000000..a2827177a3 --- /dev/null +++ b/api/core/tools/provider/builtin/openweather/openweather.py @@ -0,0 +1,36 @@ +import requests + +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +def query_weather(city="Beijing", units="metric", language="zh_cn", api_key=None): + + url = "https://api.openweathermap.org/data/2.5/weather" + params = {"q": city, "appid": api_key, "units": units, "lang": language} + + return requests.get(url, params=params) + + +class OpenweatherProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict) -> None: + try: + if "api_key" not in credentials or not credentials.get("api_key"): + raise ToolProviderCredentialValidationError( + "Open weather API key is required." + ) + apikey = credentials.get("api_key") + try: + response = query_weather(api_key=apikey) + if response.status_code == 200: + pass + else: + raise ToolProviderCredentialValidationError( + (response.json()).get("info") + ) + except Exception as e: + raise ToolProviderCredentialValidationError( + "Open weather API Key is invalid. {}".format(e) + ) + except Exception as e: + raise ToolProviderCredentialValidationError(str(e)) diff --git a/api/core/tools/provider/builtin/openweather/openweather.yaml b/api/core/tools/provider/builtin/openweather/openweather.yaml new file mode 100644 index 0000000000..60bb33c36d --- /dev/null +++ b/api/core/tools/provider/builtin/openweather/openweather.yaml @@ -0,0 +1,29 @@ +identity: + author: Onelevenvy + name: openweather + label: + en_US: Open weather query + zh_Hans: Open Weather + pt_BR: Consulta de clima open weather + description: + en_US: Weather query toolkit based on Open Weather + zh_Hans: 基于open weather的天气查询工具包 + pt_BR: Kit de consulta de clima baseado no Open Weather + icon: icon.svg +credentials_for_provider: + api_key: + type: secret-input + required: true + label: + en_US: API Key + zh_Hans: API Key + pt_BR: Fogo a chave + placeholder: + en_US: Please enter your open weather API Key + zh_Hans: 请输入你的open weather API Key + pt_BR: Insira sua chave de API open weather + help: + en_US: Get your API Key from open weather + zh_Hans: 从open weather获取您的 API Key + pt_BR: Obtenha sua chave de API do open weather + url: https://openweathermap.org diff --git a/api/core/tools/provider/builtin/openweather/tools/weather.py b/api/core/tools/provider/builtin/openweather/tools/weather.py new file mode 100644 index 0000000000..536a3511f4 --- /dev/null +++ b/api/core/tools/provider/builtin/openweather/tools/weather.py @@ -0,0 +1,60 @@ +import json +from typing import Any, Union + +import requests + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +class OpenweatherTool(BuiltinTool): + def _invoke( + self, user_id: str, tool_parameters: dict[str, Any] + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + """ + invoke tools + """ + city = tool_parameters.get("city", "") + if not city: + return self.create_text_message("Please tell me your city") + if ( + "api_key" not in self.runtime.credentials + or not self.runtime.credentials.get("api_key") + ): + return self.create_text_message("OpenWeather API key is required.") + + units = tool_parameters.get("units", "metric") + lang = tool_parameters.get("lang", "zh_cn") + try: + # request URL + url = "https://api.openweathermap.org/data/2.5/weather" + + # request parmas + params = { + "q": city, + "appid": self.runtime.credentials.get("api_key"), + "units": units, + "lang": lang, + } + response = requests.get(url, params=params) + + if response.status_code == 200: + + data = response.json() + return self.create_text_message( + self.summary( + user_id=user_id, content=json.dumps(data, ensure_ascii=False) + ) + ) + else: + error_message = { + "error": f"failed:{response.status_code}", + "data": response.text, + } + # return error + return json.dumps(error_message) + + except Exception as e: + return self.create_text_message( + "Openweather API Key is invalid. {}".format(e) + ) diff --git a/api/core/tools/provider/builtin/openweather/tools/weather.yaml b/api/core/tools/provider/builtin/openweather/tools/weather.yaml new file mode 100644 index 0000000000..f2dae5c2df --- /dev/null +++ b/api/core/tools/provider/builtin/openweather/tools/weather.yaml @@ -0,0 +1,80 @@ +identity: + name: weather + author: Onelevenvy + label: + en_US: Open Weather Query + zh_Hans: 天气查询 + pt_BR: Previsão do tempo + icon: icon.svg +description: + human: + en_US: Weather forecast inquiry + zh_Hans: 天气查询 + pt_BR: Inquérito sobre previsão meteorológica + llm: A tool when you want to ask about the weather or weather-related question +parameters: + - name: city + type: string + required: true + label: + en_US: city + zh_Hans: 城市 + pt_BR: cidade + human_description: + en_US: Target city for weather forecast query + zh_Hans: 天气预报查询的目标城市 + pt_BR: Cidade de destino para consulta de previsão do tempo + llm_description: If you don't know you can extract the city name from the + question or you can reply:Please tell me your city. You have to extract + the Chinese city name from the question.If the input region is in Chinese + characters for China, it should be replaced with the corresponding English + name, such as '北京' for correct input is 'Beijing' + form: llm + - name: lang + type: select + required: true + human_description: + en_US: language + zh_Hans: 语言 + pt_BR: language + label: + en_US: language + zh_Hans: 语言 + pt_BR: language + form: form + options: + - value: zh_cn + label: + en_US: cn + zh_Hans: 中国 + pt_BR: cn + - value: en_us + label: + en_US: usa + zh_Hans: 美国 + pt_BR: usa + default: zh_cn + - name: units + type: select + required: true + human_description: + en_US: units for temperature + zh_Hans: 温度单位 + pt_BR: units for temperature + label: + en_US: units + zh_Hans: 单位 + pt_BR: units + form: form + options: + - value: metric + label: + en_US: metric + zh_Hans: ℃ + pt_BR: metric + - value: imperial + label: + en_US: imperial + zh_Hans: ℉ + pt_BR: imperial + default: metric diff --git a/api/core/tools/provider/builtin/spark/__init__.py b/api/core/tools/provider/builtin/spark/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/tools/provider/builtin/spark/_assets/icon.svg b/api/core/tools/provider/builtin/spark/_assets/icon.svg new file mode 100644 index 0000000000..ef0a9131a4 --- /dev/null +++ b/api/core/tools/provider/builtin/spark/_assets/icon.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/api/core/tools/provider/builtin/spark/spark.py b/api/core/tools/provider/builtin/spark/spark.py new file mode 100644 index 0000000000..cb8e69a59f --- /dev/null +++ b/api/core/tools/provider/builtin/spark/spark.py @@ -0,0 +1,40 @@ +import json + +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.provider.builtin.spark.tools.spark_img_generation import spark_response +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +class SparkProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict) -> None: + try: + if "APPID" not in credentials or not credentials.get("APPID"): + raise ToolProviderCredentialValidationError("APPID is required.") + if "APISecret" not in credentials or not credentials.get("APISecret"): + raise ToolProviderCredentialValidationError("APISecret is required.") + if "APIKey" not in credentials or not credentials.get("APIKey"): + raise ToolProviderCredentialValidationError("APIKey is required.") + + appid = credentials.get("APPID") + apisecret = credentials.get("APISecret") + apikey = credentials.get("APIKey") + prompt = "a cute black dog" + + try: + response = spark_response(prompt, appid, apikey, apisecret) + data = json.loads(response) + code = data["header"]["code"] + + if code == 0: + # 0 success, + pass + else: + raise ToolProviderCredentialValidationError( + "image generate error, code:{}".format(code) + ) + except Exception as e: + raise ToolProviderCredentialValidationError( + "APPID APISecret APIKey is invalid. {}".format(e) + ) + except Exception as e: + raise ToolProviderCredentialValidationError(str(e)) diff --git a/api/core/tools/provider/builtin/spark/spark.yaml b/api/core/tools/provider/builtin/spark/spark.yaml new file mode 100644 index 0000000000..f2b9c89e96 --- /dev/null +++ b/api/core/tools/provider/builtin/spark/spark.yaml @@ -0,0 +1,59 @@ +identity: + author: Onelevenvy + name: spark + label: + en_US: Spark + zh_Hans: 讯飞星火 + pt_BR: Spark + description: + en_US: Spark Platform Toolkit + zh_Hans: 讯飞星火平台工具 + pt_BR: Pacote de Ferramentas da Plataforma Spark + icon: icon.svg +credentials_for_provider: + APPID: + type: secret-input + required: true + label: + en_US: Spark APPID + zh_Hans: APPID + pt_BR: Spark APPID + help: + en_US: Please input your APPID + zh_Hans: 请输入你的 APPID + pt_BR: Please input your APPID + placeholder: + en_US: Please input your APPID + zh_Hans: 请输入你的 APPID + pt_BR: Please input your APPID + APISecret: + type: secret-input + required: true + label: + en_US: Spark APISecret + zh_Hans: APISecret + pt_BR: Spark APISecret + help: + en_US: Please input your Spark APISecret + zh_Hans: 请输入你的 APISecret + pt_BR: Please input your Spark APISecret + placeholder: + en_US: Please input your Spark APISecret + zh_Hans: 请输入你的 APISecret + pt_BR: Please input your Spark APISecret + APIKey: + type: secret-input + required: true + label: + en_US: Spark APIKey + zh_Hans: APIKey + pt_BR: Spark APIKey + help: + en_US: Please input your Spark APIKey + zh_Hans: 请输入你的 APIKey + pt_BR: Please input your Spark APIKey + placeholder: + en_US: Please input your Spark APIKey + zh_Hans: 请输入你的 APIKey + pt_BR: Please input Spark APIKey + url: https://console.xfyun.cn/services diff --git a/api/core/tools/provider/builtin/spark/tools/spark_img_generation.py b/api/core/tools/provider/builtin/spark/tools/spark_img_generation.py new file mode 100644 index 0000000000..a977af2b76 --- /dev/null +++ b/api/core/tools/provider/builtin/spark/tools/spark_img_generation.py @@ -0,0 +1,154 @@ +import base64 +import hashlib +import hmac +import json +from base64 import b64decode +from datetime import datetime +from time import mktime +from typing import Any, Union +from urllib.parse import urlencode +from wsgiref.handlers import format_date_time + +import requests + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +class AssembleHeaderException(Exception): + def __init__(self, msg): + self.message = msg + + +class Url: + def __init__(this, host, path, schema): + this.host = host + this.path = path + this.schema = schema + + +# calculate sha256 and encode to base64 +def sha256base64(data): + sha256 = hashlib.sha256() + sha256.update(data) + digest = base64.b64encode(sha256.digest()).decode(encoding="utf-8") + return digest + + +def parse_url(requset_url): + stidx = requset_url.index("://") + host = requset_url[stidx + 3 :] + schema = requset_url[: stidx + 3] + edidx = host.index("/") + if edidx <= 0: + raise AssembleHeaderException("invalid request url:" + requset_url) + path = host[edidx:] + host = host[:edidx] + u = Url(host, path, schema) + return u + +def assemble_ws_auth_url(requset_url, method="GET", api_key="", api_secret=""): + u = parse_url(requset_url) + host = u.host + path = u.path + now = datetime.now() + date = format_date_time(mktime(now.timetuple())) + signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format( + host, date, method, path + ) + signature_sha = hmac.new( + api_secret.encode("utf-8"), + signature_origin.encode("utf-8"), + digestmod=hashlib.sha256, + ).digest() + signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8") + authorization_origin = f'api_key="{api_key}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha}"' + + authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode( + encoding="utf-8" + ) + values = {"host": host, "date": date, "authorization": authorization} + + return requset_url + "?" + urlencode(values) + + +def get_body(appid, text): + body = { + "header": {"app_id": appid, "uid": "123456789"}, + "parameter": { + "chat": {"domain": "general", "temperature": 0.5, "max_tokens": 4096} + }, + "payload": {"message": {"text": [{"role": "user", "content": text}]}}, + } + return body + + +def spark_response(text, appid, apikey, apisecret): + host = "http://spark-api.cn-huabei-1.xf-yun.com/v2.1/tti" + url = assemble_ws_auth_url( + host, method="POST", api_key=apikey, api_secret=apisecret + ) + content = get_body(appid, text) + response = requests.post( + url, json=content, headers={"content-type": "application/json"} + ).text + return response + + +class SparkImgGeneratorTool(BuiltinTool): + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + """ + invoke tools + """ + + if "APPID" not in self.runtime.credentials or not self.runtime.credentials.get( + "APPID" + ): + return self.create_text_message("APPID is required.") + if ( + "APISecret" not in self.runtime.credentials + or not self.runtime.credentials.get("APISecret") + ): + return self.create_text_message("APISecret is required.") + if ( + "APIKey" not in self.runtime.credentials + or not self.runtime.credentials.get("APIKey") + ): + return self.create_text_message("APIKey is required.") + + prompt = tool_parameters.get("prompt", "") + if not prompt: + return self.create_text_message("Please input prompt") + res = self.img_generation(prompt) + result = [] + for image in res: + result.append( + self.create_blob_message( + blob=b64decode(image["base64_image"]), + meta={"mime_type": "image/png"}, + save_as=self.VARIABLE_KEY.IMAGE.value, + ) + ) + return result + + def img_generation(self, prompt): + response = spark_response( + text=prompt, + appid=self.runtime.credentials.get("APPID"), + apikey=self.runtime.credentials.get("APIKey"), + apisecret=self.runtime.credentials.get("APISecret"), + ) + data = json.loads(response) + code = data["header"]["code"] + if code != 0: + return self.create_text_message(f"error: {code}, {data}") + else: + text = data["payload"]["choices"]["text"] + image_content = text[0] + image_base = image_content["content"] + json_data = {"base64_image": image_base} + return [json_data] diff --git a/api/core/tools/provider/builtin/spark/tools/spark_img_generation.yaml b/api/core/tools/provider/builtin/spark/tools/spark_img_generation.yaml new file mode 100644 index 0000000000..d44bbc9564 --- /dev/null +++ b/api/core/tools/provider/builtin/spark/tools/spark_img_generation.yaml @@ -0,0 +1,36 @@ +identity: + name: spark_img_generation + author: Onelevenvy + label: + en_US: Spark Image Generation + zh_Hans: 图片生成 + pt_BR: Geração de imagens Spark + icon: icon.svg + description: + en_US: Spark Image Generation + zh_Hans: 图片生成 + pt_BR: Geração de imagens Spark +description: + human: + en_US: Generate images based on user input, with image generation API + provided by Spark + zh_Hans: 根据用户的输入生成图片,由讯飞星火提供图片生成api + pt_BR: Gerar imagens com base na entrada do usuário, com API de geração + de imagem fornecida pela Spark + llm: spark_img_generation is a tool used to generate images from text +parameters: + - name: prompt + type: string + required: true + label: + en_US: Prompt + zh_Hans: 提示词 + pt_BR: Prompt + human_description: + en_US: Image prompt + zh_Hans: 图像提示词 + pt_BR: Image prompt + llm_description: Image prompt of spark_img_generation tooll, you should + describe the image you want to generate as a list of words as possible + as detailed + form: llm diff --git a/api/core/tools/provider/builtin_tool_provider.py b/api/core/tools/provider/builtin_tool_provider.py index 93e7d5a39e..824f91c822 100644 --- a/api/core/tools/provider/builtin_tool_provider.py +++ b/api/core/tools/provider/builtin_tool_provider.py @@ -246,8 +246,27 @@ class BuiltinToolProviderController(ToolProviderController): if credentials[credential_name] not in [x.value for x in options]: raise ToolProviderCredentialValidationError(f'credential {credential_schema.label.en_US} should be one of {options}') - - if credentials[credential_name]: + elif credential_schema.type == ToolProviderCredentials.CredentialsType.BOOLEAN: + if isinstance(credentials[credential_name], bool): + pass + elif isinstance(credentials[credential_name], str): + if credentials[credential_name].lower() == 'true': + credentials[credential_name] = True + elif credentials[credential_name].lower() == 'false': + credentials[credential_name] = False + else: + raise ToolProviderCredentialValidationError(f'credential {credential_schema.label.en_US} should be boolean') + elif isinstance(credentials[credential_name], int): + if credentials[credential_name] == 1: + credentials[credential_name] = True + elif credentials[credential_name] == 0: + credentials[credential_name] = False + else: + raise ToolProviderCredentialValidationError(f'credential {credential_schema.label.en_US} should be boolean') + else: + raise ToolProviderCredentialValidationError(f'credential {credential_schema.label.en_US} should be boolean') + + if credentials[credential_name] or credentials[credential_name] == False: credentials_need_to_validate.pop(credential_name) for credential_name in credentials_need_to_validate: diff --git a/api/core/tools/tool/api_tool.py b/api/core/tools/tool/api_tool.py index fa7e7567dd..54e2f41019 100644 --- a/api/core/tools/tool/api_tool.py +++ b/api/core/tools/tool/api_tool.py @@ -9,7 +9,7 @@ import requests import core.helper.ssrf_proxy as ssrf_proxy from core.tools.entities.tool_bundle import ApiBasedToolBundle from core.tools.entities.tool_entities import ToolInvokeMessage -from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.errors import ToolInvokeError, ToolParameterValidationError, ToolProviderCredentialValidationError from core.tools.tool.tool import Tool API_TOOL_DEFAULT_TIMEOUT = (10, 60) @@ -81,7 +81,7 @@ class ApiTool(Tool): needed_parameters = [parameter for parameter in self.api_bundle.parameters if parameter.required] for parameter in needed_parameters: if parameter.required and parameter.name not in parameters: - raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter.name}") + raise ToolParameterValidationError(f"Missing required parameter {parameter.name}") if parameter.default is not None and parameter.name not in parameters: parameters[parameter.name] = parameter.default @@ -94,7 +94,7 @@ class ApiTool(Tool): """ if isinstance(response, httpx.Response): if response.status_code >= 400: - raise ToolProviderCredentialValidationError(f"Request failed with status code {response.status_code}") + raise ToolInvokeError(f"Request failed with status code {response.status_code} and {response.text}") if not response.content: return 'Empty response from the tool, please check your parameters and try again.' try: @@ -107,7 +107,7 @@ class ApiTool(Tool): return response.text elif isinstance(response, requests.Response): if not response.ok: - raise ToolProviderCredentialValidationError(f"Request failed with status code {response.status_code}") + raise ToolInvokeError(f"Request failed with status code {response.status_code} and {response.text}") if not response.content: return 'Empty response from the tool, please check your parameters and try again.' try: @@ -139,7 +139,7 @@ class ApiTool(Tool): if parameter['name'] in parameters: value = parameters[parameter['name']] elif parameter['required']: - raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}") + raise ToolParameterValidationError(f"Missing required parameter {parameter['name']}") else: value = (parameter.get('schema', {}) or {}).get('default', '') path_params[parameter['name']] = value @@ -149,7 +149,7 @@ class ApiTool(Tool): if parameter['name'] in parameters: value = parameters[parameter['name']] elif parameter['required']: - raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}") + raise ToolParameterValidationError(f"Missing required parameter {parameter['name']}") else: value = (parameter.get('schema', {}) or {}).get('default', '') params[parameter['name']] = value @@ -159,7 +159,7 @@ class ApiTool(Tool): if parameter['name'] in parameters: value = parameters[parameter['name']] elif parameter['required']: - raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}") + raise ToolParameterValidationError(f"Missing required parameter {parameter['name']}") else: value = (parameter.get('schema', {}) or {}).get('default', '') cookies[parameter['name']] = value @@ -169,7 +169,7 @@ class ApiTool(Tool): if parameter['name'] in parameters: value = parameters[parameter['name']] elif parameter['required']: - raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}") + raise ToolParameterValidationError(f"Missing required parameter {parameter['name']}") else: value = (parameter.get('schema', {}) or {}).get('default', '') headers[parameter['name']] = value @@ -188,7 +188,7 @@ class ApiTool(Tool): # convert type body[name] = self._convert_body_property_type(property, parameters[name]) elif name in required: - raise ToolProviderCredentialValidationError( + raise ToolParameterValidationError( f"Missing required parameter {name} in operation {self.api_bundle.operation_id}" ) elif 'default' in property: diff --git a/api/migrations/versions/a8f9b3c45e4a_add_tenant_id_db_index.py b/api/migrations/versions/a8f9b3c45e4a_add_tenant_id_db_index.py new file mode 100644 index 0000000000..62d6faeb1d --- /dev/null +++ b/api/migrations/versions/a8f9b3c45e4a_add_tenant_id_db_index.py @@ -0,0 +1,36 @@ +"""add_tenant_id_db_index + +Revision ID: a8f9b3c45e4a +Revises: 16830a790f0f +Create Date: 2024-03-18 05:07:35.588473 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = 'a8f9b3c45e4a' +down_revision = '16830a790f0f' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('document_segments', schema=None) as batch_op: + batch_op.create_index('document_segment_tenant_idx', ['tenant_id'], unique=False) + + with op.batch_alter_table('documents', schema=None) as batch_op: + batch_op.create_index('document_tenant_idx', ['tenant_id'], unique=False) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('documents', schema=None) as batch_op: + batch_op.drop_index('document_tenant_idx') + + with op.batch_alter_table('document_segments', schema=None) as batch_op: + batch_op.drop_index('document_segment_tenant_idx') + + # ### end Alembic commands ### diff --git a/api/models/dataset.py b/api/models/dataset.py index 94664bf49a..031bbe4dc7 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -176,6 +176,7 @@ class Document(db.Model): db.PrimaryKeyConstraint('id', name='document_pkey'), db.Index('document_dataset_id_idx', 'dataset_id'), db.Index('document_is_paused_idx', 'is_paused'), + db.Index('document_tenant_idx', 'tenant_id'), ) # initial fields @@ -334,6 +335,7 @@ class DocumentSegment(db.Model): db.Index('document_segment_tenant_dataset_idx', 'dataset_id', 'tenant_id'), db.Index('document_segment_tenant_document_idx', 'document_id', 'tenant_id'), db.Index('document_segment_dataset_node_idx', 'dataset_id', 'index_node_id'), + db.Index('document_segment_tenant_idx', 'tenant_id'), ) # initial fields diff --git a/api/requirements.txt b/api/requirements.txt index 7edd95a893..886d7e42d0 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -12,7 +12,7 @@ gunicorn~=21.2.0 gevent~=23.9.1 langchain==0.0.250 openai~=1.13.3 -tiktoken~=0.5.2 +tiktoken~=0.6.0 psycopg2-binary~=2.9.6 pycryptodome==3.19.1 python-dotenv==1.0.0 @@ -36,7 +36,7 @@ python-docx~=1.1.0 pypdfium2==4.16.0 resend~=0.7.0 pyjwt~=2.8.0 -anthropic~=0.17.0 +anthropic~=0.20.0 newspaper3k==0.2.8 google-api-python-client==2.90.0 wikipedia==1.4.0 diff --git a/api/services/tools_manage_service.py b/api/services/tools_manage_service.py index ff618e5d2b..70c6a44459 100644 --- a/api/services/tools_manage_service.py +++ b/api/services/tools_manage_service.py @@ -138,9 +138,9 @@ class ToolManageService: :return: the list of tool providers """ provider = ToolManager.get_builtin_provider(provider_name) - return [ - v.to_dict() for _, v in (provider.credentials_schema or {}).items() - ] + return json.loads(serialize_base_model_array([ + v for _, v in (provider.credentials_schema or {}).items() + ])) @staticmethod def parser_api_schema(schema: str) -> list[ApiBasedToolBundle]: diff --git a/api/tasks/annotation/enable_annotation_reply_task.py b/api/tasks/annotation/enable_annotation_reply_task.py index f3260bbb50..666fa8692f 100644 --- a/api/tasks/annotation/enable_annotation_reply_task.py +++ b/api/tasks/annotation/enable_annotation_reply_task.py @@ -89,7 +89,7 @@ def enable_annotation_reply_task(job_id: str, app_id: str, user_id: str, tenant_ logging.info( click.style('Delete annotation index error: {}'.format(str(e)), fg='red')) - vector.add_texts(documents) + vector.create(documents) db.session.commit() redis_client.setex(enable_app_annotation_job_key, 600, 'completed') end_at = time.perf_counter() diff --git a/sdks/python-client/dify_client/__init__.py b/sdks/python-client/dify_client/__init__.py index 6fa9d190e5..6ef0017fee 100644 --- a/sdks/python-client/dify_client/__init__.py +++ b/sdks/python-client/dify_client/__init__.py @@ -1 +1 @@ -from dify_client.client import ChatClient, CompletionClient, DifyClient +from dify_client.client import ChatClient, CompletionClient, DifyClient \ No newline at end of file diff --git a/web/app/components/app/configuration/config/agent/agent-tools/index.tsx b/web/app/components/app/configuration/config/agent/agent-tools/index.tsx index 95858d9540..b92ff94983 100644 --- a/web/app/components/app/configuration/config/agent/agent-tools/index.tsx +++ b/web/app/components/app/configuration/config/agent/agent-tools/index.tsx @@ -210,6 +210,7 @@ const AgentTools: FC = () => { setting={currentTool?.tool_parameters as any} collection={currentTool?.collection as Collection} isBuiltIn={currentTool?.collection?.type === CollectionType.builtIn} + isModel={currentTool?.collection?.type === CollectionType.model} onSave={handleToolSettingChange} onHide={() => setIsShowSettingTool(false)} />) diff --git a/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx b/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx index 378054aae6..9eb2657fcf 100644 --- a/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx +++ b/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx @@ -58,11 +58,16 @@ const SettingBuiltInTool: FC = ({ (async () => { setIsLoading(true) try { - const list = isBuiltIn - ? await fetchBuiltInToolList(collection.name) - : isModel - ? await fetchModelToolList(collection.name) - : await fetchCustomToolList(collection.name) + const list = await new Promise((resolve) => { + (async function () { + if (isModel) + resolve(await fetchModelToolList(collection.name)) + else if (isBuiltIn) + resolve(await fetchBuiltInToolList(collection.name)) + else + resolve(await fetchCustomToolList(collection.name)) + }()) + }) setTools(list) const currTool = list.find(tool => tool.name === toolName) if (currTool) { diff --git a/web/app/components/tools/setting/build-in/config-credentials.tsx b/web/app/components/tools/setting/build-in/config-credentials.tsx index 5daab8e39c..7bb5d1733b 100644 --- a/web/app/components/tools/setting/build-in/config-credentials.tsx +++ b/web/app/components/tools/setting/build-in/config-credentials.tsx @@ -3,7 +3,7 @@ import type { FC } from 'react' import React, { useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import cn from 'classnames' -import { toolCredentialToFormSchemas } from '../../utils/to-form-schema' +import { addDefaultValue, toolCredentialToFormSchemas } from '../../utils/to-form-schema' import type { Collection } from '../../types' import Drawer from '@/app/components/base/drawer-plus' import Button from '@/app/components/base/button' @@ -30,12 +30,15 @@ const ConfigCredential: FC = ({ const { t } = useTranslation() const [credentialSchema, setCredentialSchema] = useState(null) const { team_credentials: credentialValue, name: collectionName } = collection + const [tempCredential, setTempCredential] = React.useState(credentialValue) useEffect(() => { fetchBuiltInToolCredentialSchema(collectionName).then((res) => { - setCredentialSchema(toolCredentialToFormSchemas(res)) + const toolCredentialSchemas = toolCredentialToFormSchemas(res) + const defaultCredentials = addDefaultValue(credentialValue, toolCredentialSchemas) + setCredentialSchema(toolCredentialSchemas) + setTempCredential(defaultCredentials) }) }, []) - const [tempCredential, setTempCredential] = React.useState(credentialValue) return (