diff --git a/api/commands.py b/api/commands.py
index 376a394d1e..63f691a555 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -254,7 +254,7 @@ def migrate_knowledge_vector_database():
for dataset in datasets:
total_count = total_count + 1
click.echo(f'Processing the {total_count} dataset {dataset.id}. '
- + f'{create_count} created, ${skipped_count} skipped.')
+ + f'{create_count} created, {skipped_count} skipped.')
try:
click.echo('Create dataset vdb index: {}'.format(dataset.id))
if dataset.index_struct_dict:
diff --git a/api/config.py b/api/config.py
index 9a39b27b97..ed933372a2 100644
--- a/api/config.py
+++ b/api/config.py
@@ -95,7 +95,7 @@ class Config:
# ------------------------
# General Configurations.
# ------------------------
- self.CURRENT_VERSION = "0.5.9"
+ self.CURRENT_VERSION = "0.5.10"
self.COMMIT_SHA = get_env('COMMIT_SHA')
self.EDITION = "SELF_HOSTED"
self.DEPLOY_ENV = get_env('DEPLOY_ENV')
diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index 97116978cd..049ad67a77 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -2,6 +2,7 @@
- anthropic
- azure_openai
- google
+- nvidia
- cohere
- bedrock
- togetherai
diff --git a/api/core/model_runtime/model_providers/nvidia/__init__.py b/api/core/model_runtime/model_providers/nvidia/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png
new file mode 100644
index 0000000000..5a7f42e617
Binary files /dev/null and b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png differ
diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
new file mode 100644
index 0000000000..9fc02f9164
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
new file mode 100644
index 0000000000..78ab4cb93e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -0,0 +1,4 @@
+- google/gemma-7b
+- meta/llama2-70b
+- mistralai/mixtral-8x7b-instruct-v0.1
+- fuyu-8b
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
new file mode 100644
index 0000000000..49749bba90
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
@@ -0,0 +1,27 @@
+model: fuyu-8b
+label:
+ zh_Hans: fuyu-8b
+ en_US: fuyu-8b
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 16000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.2
+ min: 0.1
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ min: 0.1
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
new file mode 100644
index 0000000000..c50dad4f14
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-7b
+label:
+ zh_Hans: google/gemma-7b
+ en_US: google/gemma-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
new file mode 100644
index 0000000000..46422cbdb6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
@@ -0,0 +1,30 @@
+model: meta/llama2-70b
+label:
+ zh_Hans: meta/llama2-70b
+ en_US: meta/llama2-70b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
new file mode 100644
index 0000000000..5d05e606b0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -0,0 +1,247 @@
+import json
+from collections.abc import Generator
+from typing import Optional, Union
+
+import requests
+from yarl import URL
+
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
+from core.model_runtime.entities.message_entities import (
+ PromptMessage,
+ PromptMessageContentType,
+ PromptMessageFunction,
+ PromptMessageTool,
+ UserPromptMessage,
+)
+from core.model_runtime.errors.invoke import InvokeError
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
+from core.model_runtime.utils import helper
+
+
+class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
+ MODEL_SUFFIX_MAP = {
+ 'fuyu-8b': 'vlm/adept/fuyu-8b',
+ 'mistralai/mixtral-8x7b-instruct-v0.1': '',
+ 'google/gemma-7b': '',
+ 'meta/llama2-70b': ''
+ }
+
+ def _invoke(self, model: str, credentials: dict,
+ prompt_messages: list[PromptMessage], model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+ stream: bool = True, user: Optional[str] = None) \
+ -> Union[LLMResult, Generator]:
+
+ self._add_custom_parameters(credentials, model)
+ prompt_messages = self._transform_prompt_messages(prompt_messages)
+ stop = []
+ user = None
+
+ return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+
+ def _transform_prompt_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
+ """
+ Handle Image transform
+ """
+ for i, p in enumerate(prompt_messages):
+ if isinstance(p, UserPromptMessage) and isinstance(p.content, list):
+ content = p.content
+ content_text = ''
+ for prompt_content in content:
+ if prompt_content.type == PromptMessageContentType.TEXT:
+ content_text += prompt_content.data
+ else:
+ content_text += f'
'
+
+ prompt_message = UserPromptMessage(
+ content=content_text
+ )
+ prompt_messages[i] = prompt_message
+ return prompt_messages
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ self._add_custom_parameters(credentials, model)
+ self._validate_credentials(model, credentials)
+
+ def _add_custom_parameters(self, credentials: dict, model: str) -> None:
+ credentials['mode'] = 'chat'
+
+ if self.MODEL_SUFFIX_MAP[model]:
+ credentials['server_url'] = f'https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}'
+ credentials.pop('endpoint_url')
+ else:
+ credentials['endpoint_url'] = 'https://integrate.api.nvidia.com/v1'
+
+ credentials['stream_mode_delimiter'] = '\n'
+
+ def _validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials using requests to ensure compatibility with all providers following OpenAI's API standard.
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ headers = {
+ 'Content-Type': 'application/json'
+ }
+
+ api_key = credentials.get('api_key')
+ if api_key:
+ headers["Authorization"] = f"Bearer {api_key}"
+
+ endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+ if endpoint_url and not endpoint_url.endswith('/'):
+ endpoint_url += '/'
+ server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+ # prepare the payload for a simple ping to the model
+ data = {
+ 'model': model,
+ 'max_tokens': 5
+ }
+
+ completion_type = LLMMode.value_of(credentials['mode'])
+
+ if completion_type is LLMMode.CHAT:
+ data['messages'] = [
+ {
+ "role": "user",
+ "content": "ping"
+ },
+ ]
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ elif completion_type is LLMMode.COMPLETION:
+ data['prompt'] = 'ping'
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ else:
+ raise ValueError("Unsupported completion type for model configuration.")
+
+ # send a post request to validate the credentials
+ response = requests.post(
+ endpoint_url,
+ headers=headers,
+ json=data,
+ timeout=(10, 60)
+ )
+
+ if response.status_code != 200:
+ raise CredentialsValidateFailedError(
+ f'Credentials validation failed with status code {response.status_code}')
+
+ try:
+ json_result = response.json()
+ except json.JSONDecodeError as e:
+ raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error')
+ except CredentialsValidateFailedError:
+ raise
+ except Exception as ex:
+ raise CredentialsValidateFailedError(f'An error occurred during credentials validation: {str(ex)}')
+
+ def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+ stream: bool = True, \
+ user: Optional[str] = None) -> Union[LLMResult, Generator]:
+ """
+ Invoke llm completion model
+
+ :param model: model name
+ :param credentials: credentials
+ :param prompt_messages: prompt messages
+ :param model_parameters: model parameters
+ :param stop: stop words
+ :param stream: is stream response
+ :param user: unique user id
+ :return: full response or stream response chunk generator result
+ """
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept-Charset': 'utf-8',
+ }
+
+ api_key = credentials.get('api_key')
+ if api_key:
+ headers['Authorization'] = f'Bearer {api_key}'
+
+ if stream:
+ headers['Accept'] = 'text/event-stream'
+
+ endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+ if endpoint_url and not endpoint_url.endswith('/'):
+ endpoint_url += '/'
+ server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+ data = {
+ "model": model,
+ "stream": stream,
+ **model_parameters
+ }
+
+ completion_type = LLMMode.value_of(credentials['mode'])
+
+ if completion_type is LLMMode.CHAT:
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ data['messages'] = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]
+ elif completion_type is LLMMode.COMPLETION:
+ data['prompt'] = 'ping'
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ else:
+ raise ValueError("Unsupported completion type for model configuration.")
+
+
+ # annotate tools with names, descriptions, etc.
+ function_calling_type = credentials.get('function_calling_type', 'no_call')
+ formatted_tools = []
+ if tools:
+ if function_calling_type == 'function_call':
+ data['functions'] = [{
+ "name": tool.name,
+ "description": tool.description,
+ "parameters": tool.parameters
+ } for tool in tools]
+ elif function_calling_type == 'tool_call':
+ data["tool_choice"] = "auto"
+
+ for tool in tools:
+ formatted_tools.append(helper.dump_model(PromptMessageFunction(function=tool)))
+
+ data["tools"] = formatted_tools
+
+ if stop:
+ data["stop"] = stop
+
+ if user:
+ data["user"] = user
+
+ response = requests.post(
+ endpoint_url,
+ headers=headers,
+ json=data,
+ timeout=(10, 60),
+ stream=stream
+ )
+
+ if response.encoding is None or response.encoding == 'ISO-8859-1':
+ response.encoding = 'utf-8'
+
+ if not response.ok:
+ raise InvokeError(f"API request failed with status code {response.status_code}: {response.text}")
+
+ if stream:
+ return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
+
+ return self._handle_generate_response(model, credentials, response, prompt_messages)
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 0000000000..fbd8cc268e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,30 @@
+model: mistralai/mixtral-8x7b-instruct-v0.1
+label:
+ zh_Hans: mistralai/mixtral-8x7b-instruct-v0.1
+ en_US: mistralai/mixtral-8x7b-instruct-v0.1
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.py b/api/core/model_runtime/model_providers/nvidia/nvidia.py
new file mode 100644
index 0000000000..e83f8badb5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.py
@@ -0,0 +1,30 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MistralAIProvider(ModelProvider):
+
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.LLM)
+
+ model_instance.validate_credentials(
+ model='mistralai/mixtral-8x7b-instruct-v0.1',
+ credentials=credentials
+ )
+ except CredentialsValidateFailedError as ex:
+ raise ex
+ except Exception as ex:
+ logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+ raise ex
diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.yaml b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
new file mode 100644
index 0000000000..c3c316321e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
@@ -0,0 +1,30 @@
+provider: nvidia
+label:
+ en_US: NVIDIA
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.png
+background: "#FFFFFF"
+help:
+ title:
+ en_US: Get your API Key from NVIDIA
+ zh_Hans: 从 NVIDIA 获取 API Key
+ url:
+ en_US: https://build.nvidia.com/explore/discover
+supported_model_types:
+ - llm
+ - text-embedding
+ - rerank
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py b/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
new file mode 100644
index 0000000000..7703ca21ab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
@@ -0,0 +1,4 @@
+model: nv-rerank-qa-mistral-4b:1
+model_type: rerank
+model_properties:
+ context_size: 8192
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
new file mode 100644
index 0000000000..9d33f55bc2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
@@ -0,0 +1,112 @@
+from math import exp
+from typing import Optional
+
+import requests
+
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+
+
+class NvidiaRerankModel(RerankModel):
+ """
+ Model class for NVIDIA rerank model.
+ """
+
+ def _sigmoid(self, logit: float) -> float:
+ return 1/(1+exp(-logit))
+
+ def _invoke(self, model: str, credentials: dict,
+ query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None,
+ user: Optional[str] = None) -> RerankResult:
+ """
+ Invoke rerank model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param query: search query
+ :param docs: docs for reranking
+ :param score_threshold: score threshold
+ :param top_n: top n documents to return
+ :param user: unique user id
+ :return: rerank result
+ """
+ if len(docs) == 0:
+ return RerankResult(model=model, docs=[])
+
+ try:
+ invoke_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking"
+
+ headers = {
+ "Authorization": f"Bearer {credentials.get('api_key')}",
+ "Accept": "application/json",
+ }
+ payload = {
+ "model": model,
+ "query": {"text": query},
+ "passages": [{"text": doc} for doc in docs],
+ }
+
+ session = requests.Session()
+ response = session.post(invoke_url, headers=headers, json=payload)
+ response.raise_for_status()
+ results = response.json()
+
+ rerank_documents = []
+ for result in results['rankings']:
+ index = result['index']
+ logit = result['logit']
+ rerank_document = RerankDocument(
+ index=index,
+ text=docs[index],
+ score=self._sigmoid(logit),
+ )
+
+ rerank_documents.append(rerank_document)
+
+ return RerankResult(model=model, docs=rerank_documents)
+ except requests.HTTPError as e:
+ raise InvokeServerUnavailableError(str(e))
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ self._invoke(
+ model=model,
+ credentials=credentials,
+ query="What is the GPU memory bandwidth of H100 SXM?",
+ docs=[
+ "Example doc 1",
+ "Example doc 2",
+ "Example doc 3",
+ ],
+ )
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ """
+ return {
+ InvokeConnectionError: [requests.ConnectionError],
+ InvokeServerUnavailableError: [requests.HTTPError],
+ InvokeRateLimitError: [],
+ InvokeAuthorizationError: [requests.HTTPError],
+ InvokeBadRequestError: [requests.RequestException]
+ }
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
new file mode 100644
index 0000000000..a9b5e25c3c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
@@ -0,0 +1,5 @@
+model: NV-Embed-QA
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..a2adef400d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -0,0 +1,172 @@
+import time
+from json import JSONDecodeError, dumps
+from typing import Optional
+
+from requests import post
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+
+
+class NvidiaTextEmbeddingModel(TextEmbeddingModel):
+ """
+ Model class for Nvidia text embedding model.
+ """
+ api_base: str = 'https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings'
+ models: list[str] = ['NV-Embed-QA']
+
+ def _invoke(self, model: str, credentials: dict,
+ texts: list[str], user: Optional[str] = None) \
+ -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :return: embeddings result
+ """
+ api_key = credentials['api_key']
+ if model not in self.models:
+ raise InvokeBadRequestError('Invalid model name')
+ if not api_key:
+ raise CredentialsValidateFailedError('api_key is required')
+ url = self.api_base
+ headers = {
+ 'Authorization': 'Bearer ' + api_key,
+ 'Content-Type': 'application/json'
+ }
+
+ data = {
+ 'model': model,
+ 'input': texts[0],
+ 'input_type': 'query'
+ }
+
+ try:
+ response = post(url, headers=headers, data=dumps(data))
+ except Exception as e:
+ raise InvokeConnectionError(str(e))
+
+ if response.status_code != 200:
+ try:
+ resp = response.json()
+ msg = resp['detail']
+ if response.status_code == 401:
+ raise InvokeAuthorizationError(msg)
+ elif response.status_code == 429:
+ raise InvokeRateLimitError(msg)
+ elif response.status_code == 500:
+ raise InvokeServerUnavailableError(msg)
+ else:
+ raise InvokeError(msg)
+ except JSONDecodeError as e:
+ raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+ try:
+ resp = response.json()
+ embeddings = resp['data']
+ usage = resp['usage']
+ except Exception as e:
+ raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+ usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens'])
+
+ result = TextEmbeddingResult(
+ model=model,
+ embeddings=[[
+ float(data) for data in x['embedding']
+ ] for x in embeddings],
+ usage=usage
+ )
+
+ return result
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ num_tokens = 0
+ for text in texts:
+ # use JinaTokenizer to get num tokens
+ num_tokens += self._get_num_tokens_by_gpt2(text)
+ return num_tokens
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ self._invoke(model=model, credentials=credentials, texts=['ping'])
+ except InvokeAuthorizationError:
+ raise CredentialsValidateFailedError('Invalid api key')
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ return {
+ InvokeConnectionError: [
+ InvokeConnectionError
+ ],
+ InvokeServerUnavailableError: [
+ InvokeServerUnavailableError
+ ],
+ InvokeRateLimitError: [
+ InvokeRateLimitError
+ ],
+ InvokeAuthorizationError: [
+ InvokeAuthorizationError
+ ],
+ InvokeBadRequestError: [
+ KeyError
+ ]
+ }
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: input tokens
+ :return: usage
+ """
+ # get input price info
+ input_price_info = self.get_price(
+ model=model,
+ credentials=credentials,
+ price_type=PriceType.INPUT,
+ tokens=tokens
+ )
+
+ # transform usage
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at
+ )
+
+ return usage
diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
index 4d4148aa91..429c646b77 100644
--- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
@@ -9,18 +9,33 @@ model_properties:
mode: chat
context_size: 4096
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
- max: 4096
- - name: temperature
- use_template: temperature
+ max: 4000
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
type: float
- default: 0.7
- min: 0
- max: 2
+ default: 0.8
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
pricing:
input: '0.0025'
output: '0.0025'
diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
index 4fbe84e9b7..d0e181d007 100644
--- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
@@ -9,18 +9,33 @@ model_properties:
mode: chat
context_size: 200000
parameter_rules:
- - name: max_tokens
- use_template: max_tokens
- type: int
- default: 1024
- min: 1
- max: 200000
- name: temperature
use_template: temperature
type: float
- default: 0.7
- min: 0
- max: 2
+ default: 0.6
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 4096
+ min: 1
+ max: 199950
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
pricing:
input: '0.012'
output: '0.012'
diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
index 6195051f16..a6abcc401f 100644
--- a/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
@@ -9,18 +9,33 @@ model_properties:
mode: chat
context_size: 4096
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
- max: 4096
- - name: temperature
- use_template: temperature
+ max: 4000
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
type: float
- default: 0.7
- min: 0
- max: 2
+ default: 0.8
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
pricing:
input: '0.01'
output: '0.03'
diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py
index 3f0467ee24..5fbc319fd6 100644
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@@ -45,11 +45,12 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
# delete Spliter character
page_content = document_node.page_content
if page_content.startswith(".") or page_content.startswith("。"):
- page_content = page_content[1:]
+ page_content = page_content[1:].strip()
else:
page_content = page_content
- document_node.page_content = page_content
- split_documents.append(document_node)
+ if len(page_content) > 0:
+ document_node.page_content = page_content
+ split_documents.append(document_node)
all_documents.extend(split_documents)
return all_documents
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index f066582ac8..d39a719655 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -2,7 +2,7 @@ version: '3.1'
services:
# API service
api:
- image: langgenius/dify-api:0.5.9
+ image: langgenius/dify-api:0.5.10
restart: always
environment:
# Startup mode, 'api' starts the API server.
@@ -138,7 +138,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
- image: langgenius/dify-api:0.5.9
+ image: langgenius/dify-api:0.5.10
restart: always
environment:
# Startup mode, 'worker' starts the Celery worker for processing the queue.
@@ -209,7 +209,7 @@ services:
# Frontend web application.
web:
- image: langgenius/dify-web:0.5.9
+ image: langgenius/dify-web:0.5.10
restart: always
environment:
EDITION: SELF_HOSTED
diff --git a/web/package.json b/web/package.json
index fc466f42b3..513efdc657 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
{
"name": "dify-web",
- "version": "0.5.9",
+ "version": "0.5.10",
"private": true,
"scripts": {
"dev": "next dev",