diff --git a/api/commands.py b/api/commands.py index 376a394d1e..63f691a555 100644 --- a/api/commands.py +++ b/api/commands.py @@ -254,7 +254,7 @@ def migrate_knowledge_vector_database(): for dataset in datasets: total_count = total_count + 1 click.echo(f'Processing the {total_count} dataset {dataset.id}. ' - + f'{create_count} created, ${skipped_count} skipped.') + + f'{create_count} created, {skipped_count} skipped.') try: click.echo('Create dataset vdb index: {}'.format(dataset.id)) if dataset.index_struct_dict: diff --git a/api/config.py b/api/config.py index 9a39b27b97..ed933372a2 100644 --- a/api/config.py +++ b/api/config.py @@ -95,7 +95,7 @@ class Config: # ------------------------ # General Configurations. # ------------------------ - self.CURRENT_VERSION = "0.5.9" + self.CURRENT_VERSION = "0.5.10" self.COMMIT_SHA = get_env('COMMIT_SHA') self.EDITION = "SELF_HOSTED" self.DEPLOY_ENV = get_env('DEPLOY_ENV') diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml index 97116978cd..049ad67a77 100644 --- a/api/core/model_runtime/model_providers/_position.yaml +++ b/api/core/model_runtime/model_providers/_position.yaml @@ -2,6 +2,7 @@ - anthropic - azure_openai - google +- nvidia - cohere - bedrock - togetherai diff --git a/api/core/model_runtime/model_providers/nvidia/__init__.py b/api/core/model_runtime/model_providers/nvidia/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png new file mode 100644 index 0000000000..5a7f42e617 Binary files /dev/null and b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png differ diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg new file mode 100644 index 0000000000..9fc02f9164 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg @@ -0,0 +1,3 @@ + + + diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml new file mode 100644 index 0000000000..78ab4cb93e --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml @@ -0,0 +1,4 @@ +- google/gemma-7b +- meta/llama2-70b +- mistralai/mixtral-8x7b-instruct-v0.1 +- fuyu-8b diff --git a/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml new file mode 100644 index 0000000000..49749bba90 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml @@ -0,0 +1,27 @@ +model: fuyu-8b +label: + zh_Hans: fuyu-8b + en_US: fuyu-8b +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 16000 +parameter_rules: + - name: temperature + use_template: temperature + default: 0.2 + min: 0.1 + max: 1 + - name: top_p + use_template: top_p + default: 0.7 + min: 0.1 + max: 1 + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 1024 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml new file mode 100644 index 0000000000..c50dad4f14 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml @@ -0,0 +1,30 @@ +model: google/gemma-7b +label: + zh_Hans: google/gemma-7b + en_US: google/gemma-7b +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml new file mode 100644 index 0000000000..46422cbdb6 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml @@ -0,0 +1,30 @@ +model: meta/llama2-70b +label: + zh_Hans: meta/llama2-70b + en_US: meta/llama2-70b +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py new file mode 100644 index 0000000000..5d05e606b0 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py @@ -0,0 +1,247 @@ +import json +from collections.abc import Generator +from typing import Optional, Union + +import requests +from yarl import URL + +from core.model_runtime.entities.llm_entities import LLMMode, LLMResult +from core.model_runtime.entities.message_entities import ( + PromptMessage, + PromptMessageContentType, + PromptMessageFunction, + PromptMessageTool, + UserPromptMessage, +) +from core.model_runtime.errors.invoke import InvokeError +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel +from core.model_runtime.utils import helper + + +class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel): + MODEL_SUFFIX_MAP = { + 'fuyu-8b': 'vlm/adept/fuyu-8b', + 'mistralai/mixtral-8x7b-instruct-v0.1': '', + 'google/gemma-7b': '', + 'meta/llama2-70b': '' + } + + def _invoke(self, model: str, credentials: dict, + prompt_messages: list[PromptMessage], model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, + stream: bool = True, user: Optional[str] = None) \ + -> Union[LLMResult, Generator]: + + self._add_custom_parameters(credentials, model) + prompt_messages = self._transform_prompt_messages(prompt_messages) + stop = [] + user = None + + return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user) + + def _transform_prompt_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]: + """ + Handle Image transform + """ + for i, p in enumerate(prompt_messages): + if isinstance(p, UserPromptMessage) and isinstance(p.content, list): + content = p.content + content_text = '' + for prompt_content in content: + if prompt_content.type == PromptMessageContentType.TEXT: + content_text += prompt_content.data + else: + content_text += f' ' + + prompt_message = UserPromptMessage( + content=content_text + ) + prompt_messages[i] = prompt_message + return prompt_messages + + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials, model) + self._validate_credentials(model, credentials) + + def _add_custom_parameters(self, credentials: dict, model: str) -> None: + credentials['mode'] = 'chat' + + if self.MODEL_SUFFIX_MAP[model]: + credentials['server_url'] = f'https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}' + credentials.pop('endpoint_url') + else: + credentials['endpoint_url'] = 'https://integrate.api.nvidia.com/v1' + + credentials['stream_mode_delimiter'] = '\n' + + def _validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials using requests to ensure compatibility with all providers following OpenAI's API standard. + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + headers = { + 'Content-Type': 'application/json' + } + + api_key = credentials.get('api_key') + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None + if endpoint_url and not endpoint_url.endswith('/'): + endpoint_url += '/' + server_url = credentials['server_url'] if 'server_url' in credentials else None + + # prepare the payload for a simple ping to the model + data = { + 'model': model, + 'max_tokens': 5 + } + + completion_type = LLMMode.value_of(credentials['mode']) + + if completion_type is LLMMode.CHAT: + data['messages'] = [ + { + "role": "user", + "content": "ping" + }, + ] + if 'endpoint_url' in credentials: + endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions') + elif 'server_url' in credentials: + endpoint_url = server_url + elif completion_type is LLMMode.COMPLETION: + data['prompt'] = 'ping' + if 'endpoint_url' in credentials: + endpoint_url = str(URL(endpoint_url) / 'completions') + elif 'server_url' in credentials: + endpoint_url = server_url + else: + raise ValueError("Unsupported completion type for model configuration.") + + # send a post request to validate the credentials + response = requests.post( + endpoint_url, + headers=headers, + json=data, + timeout=(10, 60) + ) + + if response.status_code != 200: + raise CredentialsValidateFailedError( + f'Credentials validation failed with status code {response.status_code}') + + try: + json_result = response.json() + except json.JSONDecodeError as e: + raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error') + except CredentialsValidateFailedError: + raise + except Exception as ex: + raise CredentialsValidateFailedError(f'An error occurred during credentials validation: {str(ex)}') + + def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, + stream: bool = True, \ + user: Optional[str] = None) -> Union[LLMResult, Generator]: + """ + Invoke llm completion model + + :param model: model name + :param credentials: credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + headers = { + 'Content-Type': 'application/json', + 'Accept-Charset': 'utf-8', + } + + api_key = credentials.get('api_key') + if api_key: + headers['Authorization'] = f'Bearer {api_key}' + + if stream: + headers['Accept'] = 'text/event-stream' + + endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None + if endpoint_url and not endpoint_url.endswith('/'): + endpoint_url += '/' + server_url = credentials['server_url'] if 'server_url' in credentials else None + + data = { + "model": model, + "stream": stream, + **model_parameters + } + + completion_type = LLMMode.value_of(credentials['mode']) + + if completion_type is LLMMode.CHAT: + if 'endpoint_url' in credentials: + endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions') + elif 'server_url' in credentials: + endpoint_url = server_url + data['messages'] = [self._convert_prompt_message_to_dict(m) for m in prompt_messages] + elif completion_type is LLMMode.COMPLETION: + data['prompt'] = 'ping' + if 'endpoint_url' in credentials: + endpoint_url = str(URL(endpoint_url) / 'completions') + elif 'server_url' in credentials: + endpoint_url = server_url + else: + raise ValueError("Unsupported completion type for model configuration.") + + + # annotate tools with names, descriptions, etc. + function_calling_type = credentials.get('function_calling_type', 'no_call') + formatted_tools = [] + if tools: + if function_calling_type == 'function_call': + data['functions'] = [{ + "name": tool.name, + "description": tool.description, + "parameters": tool.parameters + } for tool in tools] + elif function_calling_type == 'tool_call': + data["tool_choice"] = "auto" + + for tool in tools: + formatted_tools.append(helper.dump_model(PromptMessageFunction(function=tool))) + + data["tools"] = formatted_tools + + if stop: + data["stop"] = stop + + if user: + data["user"] = user + + response = requests.post( + endpoint_url, + headers=headers, + json=data, + timeout=(10, 60), + stream=stream + ) + + if response.encoding is None or response.encoding == 'ISO-8859-1': + response.encoding = 'utf-8' + + if not response.ok: + raise InvokeError(f"API request failed with status code {response.status_code}: {response.text}") + + if stream: + return self._handle_generate_stream_response(model, credentials, response, prompt_messages) + + return self._handle_generate_response(model, credentials, response, prompt_messages) diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml new file mode 100644 index 0000000000..fbd8cc268e --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml @@ -0,0 +1,30 @@ +model: mistralai/mixtral-8x7b-instruct-v0.1 +label: + zh_Hans: mistralai/mixtral-8x7b-instruct-v0.1 + en_US: mistralai/mixtral-8x7b-instruct-v0.1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.py b/api/core/model_runtime/model_providers/nvidia/nvidia.py new file mode 100644 index 0000000000..e83f8badb5 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/nvidia.py @@ -0,0 +1,30 @@ +import logging + +from core.model_runtime.entities.model_entities import ModelType +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.model_provider import ModelProvider + +logger = logging.getLogger(__name__) + + +class MistralAIProvider(ModelProvider): + + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.LLM) + + model_instance.validate_credentials( + model='mistralai/mixtral-8x7b-instruct-v0.1', + credentials=credentials + ) + except CredentialsValidateFailedError as ex: + raise ex + except Exception as ex: + logger.exception(f'{self.get_provider_schema().provider} credentials validate failed') + raise ex diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.yaml b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml new file mode 100644 index 0000000000..c3c316321e --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml @@ -0,0 +1,30 @@ +provider: nvidia +label: + en_US: NVIDIA +icon_small: + en_US: icon_s_en.svg +icon_large: + en_US: icon_l_en.png +background: "#FFFFFF" +help: + title: + en_US: Get your API Key from NVIDIA + zh_Hans: 从 NVIDIA 获取 API Key + url: + en_US: https://build.nvidia.com/explore/discover +supported_model_types: + - llm + - text-embedding + - rerank +configurate_methods: + - predefined-model +provider_credential_schema: + credential_form_schemas: + - variable: api_key + label: + en_US: API Key + type: secret-input + required: true + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py b/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml new file mode 100644 index 0000000000..7703ca21ab --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml @@ -0,0 +1,4 @@ +model: nv-rerank-qa-mistral-4b:1 +model_type: rerank +model_properties: + context_size: 8192 diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py new file mode 100644 index 0000000000..9d33f55bc2 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py @@ -0,0 +1,112 @@ +from math import exp +from typing import Optional + +import requests + +from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult +from core.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.rerank_model import RerankModel + + +class NvidiaRerankModel(RerankModel): + """ + Model class for NVIDIA rerank model. + """ + + def _sigmoid(self, logit: float) -> float: + return 1/(1+exp(-logit)) + + def _invoke(self, model: str, credentials: dict, + query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None, + user: Optional[str] = None) -> RerankResult: + """ + Invoke rerank model + + :param model: model name + :param credentials: model credentials + :param query: search query + :param docs: docs for reranking + :param score_threshold: score threshold + :param top_n: top n documents to return + :param user: unique user id + :return: rerank result + """ + if len(docs) == 0: + return RerankResult(model=model, docs=[]) + + try: + invoke_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking" + + headers = { + "Authorization": f"Bearer {credentials.get('api_key')}", + "Accept": "application/json", + } + payload = { + "model": model, + "query": {"text": query}, + "passages": [{"text": doc} for doc in docs], + } + + session = requests.Session() + response = session.post(invoke_url, headers=headers, json=payload) + response.raise_for_status() + results = response.json() + + rerank_documents = [] + for result in results['rankings']: + index = result['index'] + logit = result['logit'] + rerank_document = RerankDocument( + index=index, + text=docs[index], + score=self._sigmoid(logit), + ) + + rerank_documents.append(rerank_document) + + return RerankResult(model=model, docs=rerank_documents) + except requests.HTTPError as e: + raise InvokeServerUnavailableError(str(e)) + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + self._invoke( + model=model, + credentials=credentials, + query="What is the GPU memory bandwidth of H100 SXM?", + docs=[ + "Example doc 1", + "Example doc 2", + "Example doc 3", + ], + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + """ + return { + InvokeConnectionError: [requests.ConnectionError], + InvokeServerUnavailableError: [requests.HTTPError], + InvokeRateLimitError: [], + InvokeAuthorizationError: [requests.HTTPError], + InvokeBadRequestError: [requests.RequestException] + } diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml new file mode 100644 index 0000000000..a9b5e25c3c --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml @@ -0,0 +1,5 @@ +model: NV-Embed-QA +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py new file mode 100644 index 0000000000..a2adef400d --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py @@ -0,0 +1,172 @@ +import time +from json import JSONDecodeError, dumps +from typing import Optional + +from requests import post + +from core.model_runtime.entities.model_entities import PriceType +from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult +from core.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel + + +class NvidiaTextEmbeddingModel(TextEmbeddingModel): + """ + Model class for Nvidia text embedding model. + """ + api_base: str = 'https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings' + models: list[str] = ['NV-Embed-QA'] + + def _invoke(self, model: str, credentials: dict, + texts: list[str], user: Optional[str] = None) \ + -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :return: embeddings result + """ + api_key = credentials['api_key'] + if model not in self.models: + raise InvokeBadRequestError('Invalid model name') + if not api_key: + raise CredentialsValidateFailedError('api_key is required') + url = self.api_base + headers = { + 'Authorization': 'Bearer ' + api_key, + 'Content-Type': 'application/json' + } + + data = { + 'model': model, + 'input': texts[0], + 'input_type': 'query' + } + + try: + response = post(url, headers=headers, data=dumps(data)) + except Exception as e: + raise InvokeConnectionError(str(e)) + + if response.status_code != 200: + try: + resp = response.json() + msg = resp['detail'] + if response.status_code == 401: + raise InvokeAuthorizationError(msg) + elif response.status_code == 429: + raise InvokeRateLimitError(msg) + elif response.status_code == 500: + raise InvokeServerUnavailableError(msg) + else: + raise InvokeError(msg) + except JSONDecodeError as e: + raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}") + + try: + resp = response.json() + embeddings = resp['data'] + usage = resp['usage'] + except Exception as e: + raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}") + + usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens']) + + result = TextEmbeddingResult( + model=model, + embeddings=[[ + float(data) for data in x['embedding'] + ] for x in embeddings], + usage=usage + ) + + return result + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :return: + """ + num_tokens = 0 + for text in texts: + # use JinaTokenizer to get num tokens + num_tokens += self._get_num_tokens_by_gpt2(text) + return num_tokens + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + self._invoke(model=model, credentials=credentials, texts=['ping']) + except InvokeAuthorizationError: + raise CredentialsValidateFailedError('Invalid api key') + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + return { + InvokeConnectionError: [ + InvokeConnectionError + ], + InvokeServerUnavailableError: [ + InvokeServerUnavailableError + ], + InvokeRateLimitError: [ + InvokeRateLimitError + ], + InvokeAuthorizationError: [ + InvokeAuthorizationError + ], + InvokeBadRequestError: [ + KeyError + ] + } + + def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage: + """ + Calculate response usage + + :param model: model name + :param credentials: model credentials + :param tokens: input tokens + :return: usage + """ + # get input price info + input_price_info = self.get_price( + model=model, + credentials=credentials, + price_type=PriceType.INPUT, + tokens=tokens + ) + + # transform usage + usage = EmbeddingUsage( + tokens=tokens, + total_tokens=tokens, + unit_price=input_price_info.unit_price, + price_unit=input_price_info.unit, + total_price=input_price_info.total_amount, + currency=input_price_info.currency, + latency=time.perf_counter() - self.started_at + ) + + return usage diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml index 4d4148aa91..429c646b77 100644 --- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml +++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml @@ -9,18 +9,33 @@ model_properties: mode: chat context_size: 4096 parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。 + en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is. - name: max_tokens use_template: max_tokens type: int default: 512 min: 1 - max: 4096 - - name: temperature - use_template: temperature + max: 4000 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p type: float - default: 0.7 - min: 0 - max: 2 + default: 0.8 + min: 0.01 + max: 1.00 + help: + zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。 + en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature. pricing: input: '0.0025' output: '0.0025' diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml index 4fbe84e9b7..d0e181d007 100644 --- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml +++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml @@ -9,18 +9,33 @@ model_properties: mode: chat context_size: 200000 parameter_rules: - - name: max_tokens - use_template: max_tokens - type: int - default: 1024 - min: 1 - max: 200000 - name: temperature use_template: temperature type: float - default: 0.7 - min: 0 - max: 2 + default: 0.6 + min: 0.0 + max: 2.0 + help: + zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。 + en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is. + - name: max_tokens + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 199950 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + type: float + default: 0.9 + min: 0.01 + max: 1.00 + help: + zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。 + en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature. pricing: input: '0.012' output: '0.012' diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml index 6195051f16..a6abcc401f 100644 --- a/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml +++ b/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml @@ -9,18 +9,33 @@ model_properties: mode: chat context_size: 4096 parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。 + en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is. - name: max_tokens use_template: max_tokens type: int default: 512 min: 1 - max: 4096 - - name: temperature - use_template: temperature + max: 4000 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p type: float - default: 0.7 - min: 0 - max: 2 + default: 0.8 + min: 0.01 + max: 1.00 + help: + zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。 + en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature. pricing: input: '0.01' output: '0.03' diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py index 3f0467ee24..5fbc319fd6 100644 --- a/api/core/rag/index_processor/processor/paragraph_index_processor.py +++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py @@ -45,11 +45,12 @@ class ParagraphIndexProcessor(BaseIndexProcessor): # delete Spliter character page_content = document_node.page_content if page_content.startswith(".") or page_content.startswith("。"): - page_content = page_content[1:] + page_content = page_content[1:].strip() else: page_content = page_content - document_node.page_content = page_content - split_documents.append(document_node) + if len(page_content) > 0: + document_node.page_content = page_content + split_documents.append(document_node) all_documents.extend(split_documents) return all_documents diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index f066582ac8..d39a719655 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -2,7 +2,7 @@ version: '3.1' services: # API service api: - image: langgenius/dify-api:0.5.9 + image: langgenius/dify-api:0.5.10 restart: always environment: # Startup mode, 'api' starts the API server. @@ -138,7 +138,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.5.9 + image: langgenius/dify-api:0.5.10 restart: always environment: # Startup mode, 'worker' starts the Celery worker for processing the queue. @@ -209,7 +209,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.5.9 + image: langgenius/dify-web:0.5.10 restart: always environment: EDITION: SELF_HOSTED diff --git a/web/package.json b/web/package.json index fc466f42b3..513efdc657 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "dify-web", - "version": "0.5.9", + "version": "0.5.10", "private": true, "scripts": { "dev": "next dev",