diff --git a/api/commands.py b/api/commands.py
index 376a394d1e..63f691a555 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -254,7 +254,7 @@ def migrate_knowledge_vector_database():
         for dataset in datasets:
             total_count = total_count + 1
             click.echo(f'Processing the {total_count} dataset {dataset.id}. '
-                       + f'{create_count} created, ${skipped_count} skipped.')
+                       + f'{create_count} created, {skipped_count} skipped.')
             try:
                 click.echo('Create dataset vdb index: {}'.format(dataset.id))
                 if dataset.index_struct_dict:
diff --git a/api/config.py b/api/config.py
index 9a39b27b97..ed933372a2 100644
--- a/api/config.py
+++ b/api/config.py
@@ -95,7 +95,7 @@ class Config:
         # ------------------------
         # General Configurations.
         # ------------------------
-        self.CURRENT_VERSION = "0.5.9"
+        self.CURRENT_VERSION = "0.5.10"
         self.COMMIT_SHA = get_env('COMMIT_SHA')
         self.EDITION = "SELF_HOSTED"
         self.DEPLOY_ENV = get_env('DEPLOY_ENV')
diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index 97116978cd..049ad67a77 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -2,6 +2,7 @@
 - anthropic
 - azure_openai
 - google
+- nvidia
 - cohere
 - bedrock
 - togetherai
diff --git a/api/core/model_runtime/model_providers/nvidia/__init__.py b/api/core/model_runtime/model_providers/nvidia/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png
new file mode 100644
index 0000000000..5a7f42e617
Binary files /dev/null and b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png differ
diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
new file mode 100644
index 0000000000..9fc02f9164
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+<svg width="567" height="376" viewBox="0 0 567 376" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M58.0366 161.868C58.0366 161.868 109.261 86.2912 211.538 78.4724V51.053C98.2528 60.1511 0.152344 156.098 0.152344 156.098C0.152344 156.098 55.7148 316.717 211.538 331.426V302.282C97.1876 287.896 58.0366 161.868 58.0366 161.868ZM211.538 244.32V271.013C125.114 255.603 101.125 165.768 101.125 165.768C101.125 165.768 142.621 119.799 211.538 112.345V141.633C211.486 141.633 211.449 141.617 211.406 141.617C175.235 137.276 146.978 171.067 146.978 171.067C146.978 171.067 162.816 227.949 211.538 244.32ZM211.538 0.47998V51.053C214.864 50.7981 218.189 50.5818 221.533 50.468C350.326 46.1273 434.243 156.098 434.243 156.098C434.243 156.098 337.861 273.296 237.448 273.296C228.245 273.296 219.63 272.443 211.538 271.009V302.282C218.695 303.201 225.903 303.667 233.119 303.675C326.56 303.675 394.134 255.954 459.566 199.474C470.415 208.162 514.828 229.299 523.958 238.55C461.745 290.639 316.752 332.626 234.551 332.626C226.627 332.626 219.018 332.148 211.538 331.426V375.369H566.701V0.47998H211.538ZM211.538 112.345V78.4724C214.829 78.2425 218.146 78.0672 221.533 77.9602C314.148 75.0512 374.909 157.548 374.909 157.548C374.909 157.548 309.281 248.693 238.914 248.693C228.787 248.693 219.707 247.065 211.536 244.318V141.631C247.591 145.987 254.848 161.914 276.524 198.049L324.737 157.398C324.737 157.398 289.544 111.243 230.219 111.243C223.768 111.241 217.597 111.696 211.538 112.345Z" fill="#77B900"/>
+</svg>
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
new file mode 100644
index 0000000000..78ab4cb93e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -0,0 +1,4 @@
+- google/gemma-7b
+- meta/llama2-70b
+- mistralai/mixtral-8x7b-instruct-v0.1
+- fuyu-8b
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
new file mode 100644
index 0000000000..49749bba90
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
@@ -0,0 +1,27 @@
+model: fuyu-8b
+label:
+  zh_Hans: fuyu-8b
+  en_US: fuyu-8b
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 16000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.2
+    min: 0.1
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 0.7
+    min: 0.1
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
new file mode 100644
index 0000000000..c50dad4f14
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-7b
+label:
+  zh_Hans: google/gemma-7b
+  en_US: google/gemma-7b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
new file mode 100644
index 0000000000..46422cbdb6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
@@ -0,0 +1,30 @@
+model: meta/llama2-70b
+label:
+  zh_Hans: meta/llama2-70b
+  en_US: meta/llama2-70b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
new file mode 100644
index 0000000000..5d05e606b0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -0,0 +1,247 @@
+import json
+from collections.abc import Generator
+from typing import Optional, Union
+
+import requests
+from yarl import URL
+
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
+from core.model_runtime.entities.message_entities import (
+    PromptMessage,
+    PromptMessageContentType,
+    PromptMessageFunction,
+    PromptMessageTool,
+    UserPromptMessage,
+)
+from core.model_runtime.errors.invoke import InvokeError
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
+from core.model_runtime.utils import helper
+
+
+class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
+    MODEL_SUFFIX_MAP = {
+        'fuyu-8b': 'vlm/adept/fuyu-8b',
+        'mistralai/mixtral-8x7b-instruct-v0.1': '',
+        'google/gemma-7b': '',
+        'meta/llama2-70b': ''
+    }
+
+    def _invoke(self, model: str, credentials: dict,
+                prompt_messages: list[PromptMessage], model_parameters: dict,
+                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                stream: bool = True, user: Optional[str] = None) \
+            -> Union[LLMResult, Generator]:
+        
+        self._add_custom_parameters(credentials, model)
+        prompt_messages = self._transform_prompt_messages(prompt_messages)
+        stop = []
+        user = None
+
+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+
+    def _transform_prompt_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
+        """
+        Handle Image transform
+        """
+        for i, p in enumerate(prompt_messages):
+            if isinstance(p, UserPromptMessage) and isinstance(p.content, list):
+                content = p.content
+                content_text = ''
+                for prompt_content in content:
+                    if prompt_content.type == PromptMessageContentType.TEXT:
+                        content_text += prompt_content.data
+                    else:
+                        content_text += f' <img src="{prompt_content.data}" />'
+
+                prompt_message = UserPromptMessage(
+                    content=content_text
+                )
+                prompt_messages[i] = prompt_message
+        return prompt_messages
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        self._add_custom_parameters(credentials, model)
+        self._validate_credentials(model, credentials)
+
+    def _add_custom_parameters(self, credentials: dict, model: str) -> None:
+        credentials['mode'] = 'chat'
+        
+        if self.MODEL_SUFFIX_MAP[model]:
+            credentials['server_url'] = f'https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}'
+            credentials.pop('endpoint_url')
+        else:
+            credentials['endpoint_url'] = 'https://integrate.api.nvidia.com/v1'
+
+        credentials['stream_mode_delimiter'] = '\n'
+
+    def _validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials using requests to ensure compatibility with all providers following OpenAI's API standard.
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            headers = {
+                'Content-Type': 'application/json'
+            }
+
+            api_key = credentials.get('api_key')
+            if api_key:
+                headers["Authorization"] = f"Bearer {api_key}"
+
+            endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+            if endpoint_url and not endpoint_url.endswith('/'):
+                endpoint_url += '/'
+            server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+            # prepare the payload for a simple ping to the model
+            data = {
+                'model': model,
+                'max_tokens': 5
+            }
+
+            completion_type = LLMMode.value_of(credentials['mode'])
+
+            if completion_type is LLMMode.CHAT:
+                data['messages'] = [
+                    {
+                        "role": "user",
+                        "content": "ping"
+                    },
+                ]
+                if 'endpoint_url' in credentials:
+                    endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+                elif 'server_url' in credentials:
+                    endpoint_url = server_url
+            elif completion_type is LLMMode.COMPLETION:
+                data['prompt'] = 'ping'
+                if 'endpoint_url' in credentials:
+                    endpoint_url = str(URL(endpoint_url) / 'completions')
+                elif 'server_url' in credentials:
+                    endpoint_url = server_url
+            else:
+                raise ValueError("Unsupported completion type for model configuration.")
+
+            # send a post request to validate the credentials
+            response = requests.post(
+                endpoint_url,
+                headers=headers,
+                json=data,
+                timeout=(10, 60)
+            )
+
+            if response.status_code != 200:
+                raise CredentialsValidateFailedError(
+                    f'Credentials validation failed with status code {response.status_code}')
+
+            try:
+                json_result = response.json()
+            except json.JSONDecodeError as e:
+                raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error')
+        except CredentialsValidateFailedError:
+            raise
+        except Exception as ex:
+            raise CredentialsValidateFailedError(f'An error occurred during credentials validation: {str(ex)}')
+
+    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
+                  tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                  stream: bool = True, \
+                  user: Optional[str] = None) -> Union[LLMResult, Generator]:
+        """
+        Invoke llm completion model
+
+        :param model: model name
+        :param credentials: credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+        headers = {
+            'Content-Type': 'application/json',
+            'Accept-Charset': 'utf-8',
+        }
+
+        api_key = credentials.get('api_key')
+        if api_key:
+            headers['Authorization'] = f'Bearer {api_key}'
+
+        if stream:
+            headers['Accept'] = 'text/event-stream'
+
+        endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+        if endpoint_url and not endpoint_url.endswith('/'):
+            endpoint_url += '/'
+        server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+        data = {
+            "model": model,
+            "stream": stream,
+            **model_parameters
+        }
+
+        completion_type = LLMMode.value_of(credentials['mode'])
+
+        if completion_type is LLMMode.CHAT:
+            if 'endpoint_url' in credentials:
+                endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+            elif 'server_url' in credentials:
+                endpoint_url = server_url
+            data['messages'] = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]
+        elif completion_type is LLMMode.COMPLETION:
+            data['prompt'] = 'ping'
+            if 'endpoint_url' in credentials:
+                endpoint_url = str(URL(endpoint_url) / 'completions')
+            elif 'server_url' in credentials:
+                endpoint_url = server_url
+        else:
+            raise ValueError("Unsupported completion type for model configuration.")
+
+
+        # annotate tools with names, descriptions, etc.
+        function_calling_type = credentials.get('function_calling_type', 'no_call')
+        formatted_tools = []
+        if tools:
+            if function_calling_type == 'function_call':
+                data['functions'] = [{
+                    "name": tool.name,
+                    "description": tool.description,
+                    "parameters": tool.parameters
+                } for tool in tools]
+            elif function_calling_type == 'tool_call':
+                data["tool_choice"] = "auto"
+
+                for tool in tools:
+                    formatted_tools.append(helper.dump_model(PromptMessageFunction(function=tool)))
+
+                data["tools"] = formatted_tools
+
+        if stop:
+            data["stop"] = stop
+
+        if user:
+            data["user"] = user
+
+        response = requests.post(
+            endpoint_url,
+            headers=headers,
+            json=data,
+            timeout=(10, 60),
+            stream=stream
+        )
+
+        if response.encoding is None or response.encoding == 'ISO-8859-1':
+            response.encoding = 'utf-8'
+
+        if not response.ok:
+            raise InvokeError(f"API request failed with status code {response.status_code}: {response.text}")
+
+        if stream:
+            return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
+
+        return self._handle_generate_response(model, credentials, response, prompt_messages)
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 0000000000..fbd8cc268e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,30 @@
+model: mistralai/mixtral-8x7b-instruct-v0.1
+label:
+  zh_Hans: mistralai/mixtral-8x7b-instruct-v0.1
+  en_US: mistralai/mixtral-8x7b-instruct-v0.1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.py b/api/core/model_runtime/model_providers/nvidia/nvidia.py
new file mode 100644
index 0000000000..e83f8badb5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.py
@@ -0,0 +1,30 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MistralAIProvider(ModelProvider):
+
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+
+            model_instance.validate_credentials(
+                model='mistralai/mixtral-8x7b-instruct-v0.1',
+                credentials=credentials
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise ex
diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.yaml b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
new file mode 100644
index 0000000000..c3c316321e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
@@ -0,0 +1,30 @@
+provider: nvidia
+label:
+  en_US: NVIDIA
+icon_small:
+  en_US: icon_s_en.svg
+icon_large:
+  en_US: icon_l_en.png
+background: "#FFFFFF"
+help:
+  title:
+    en_US: Get your API Key from NVIDIA
+    zh_Hans: 从 NVIDIA 获取 API Key
+  url:
+    en_US: https://build.nvidia.com/explore/discover
+supported_model_types:
+  - llm
+  - text-embedding
+  - rerank
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py b/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
new file mode 100644
index 0000000000..7703ca21ab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
@@ -0,0 +1,4 @@
+model: nv-rerank-qa-mistral-4b:1
+model_type: rerank
+model_properties:
+  context_size: 8192
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
new file mode 100644
index 0000000000..9d33f55bc2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
@@ -0,0 +1,112 @@
+from math import exp
+from typing import Optional
+
+import requests
+
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+
+
+class NvidiaRerankModel(RerankModel):
+    """
+    Model class for NVIDIA rerank model.
+    """
+
+    def _sigmoid(self, logit: float) -> float:
+        return 1/(1+exp(-logit))
+
+    def _invoke(self, model: str, credentials: dict,
+                query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None,
+                user: Optional[str] = None) -> RerankResult:
+        """
+        Invoke rerank model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n documents to return
+        :param user: unique user id
+        :return: rerank result
+        """
+        if len(docs) == 0:
+            return RerankResult(model=model, docs=[])
+
+        try:
+            invoke_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking"
+
+            headers = {
+                "Authorization": f"Bearer {credentials.get('api_key')}",
+                "Accept": "application/json",
+            }
+            payload = {
+                "model": model,
+                "query": {"text": query},
+                "passages": [{"text": doc} for doc in docs],
+            }
+
+            session = requests.Session()
+            response = session.post(invoke_url, headers=headers, json=payload)
+            response.raise_for_status()
+            results = response.json()
+
+            rerank_documents = []
+            for result in results['rankings']:
+                index = result['index']
+                logit = result['logit']
+                rerank_document = RerankDocument(
+                    index=index,
+                    text=docs[index],
+                    score=self._sigmoid(logit),
+                )
+
+                rerank_documents.append(rerank_document)
+
+            return RerankResult(model=model, docs=rerank_documents)
+        except requests.HTTPError as e:
+            raise InvokeServerUnavailableError(str(e))
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(
+                model=model,
+                credentials=credentials,
+                query="What is the GPU memory bandwidth of H100 SXM?",
+                docs=[
+                    "Example doc 1",
+                    "Example doc 2",
+                    "Example doc 3",
+                ],
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        """
+        return {
+            InvokeConnectionError: [requests.ConnectionError],
+            InvokeServerUnavailableError: [requests.HTTPError],
+            InvokeRateLimitError: [],
+            InvokeAuthorizationError: [requests.HTTPError],
+            InvokeBadRequestError: [requests.RequestException]
+        }
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
new file mode 100644
index 0000000000..a9b5e25c3c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
@@ -0,0 +1,5 @@
+model: NV-Embed-QA
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..a2adef400d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -0,0 +1,172 @@
+import time
+from json import JSONDecodeError, dumps
+from typing import Optional
+
+from requests import post
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+
+
+class NvidiaTextEmbeddingModel(TextEmbeddingModel):
+    """
+    Model class for Nvidia text embedding model.
+    """
+    api_base: str = 'https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings'
+    models: list[str] = ['NV-Embed-QA']
+
+    def _invoke(self, model: str, credentials: dict,
+                texts: list[str], user: Optional[str] = None) \
+            -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :return: embeddings result
+        """
+        api_key = credentials['api_key']
+        if model not in self.models:
+            raise InvokeBadRequestError('Invalid model name')
+        if not api_key:
+            raise CredentialsValidateFailedError('api_key is required')
+        url = self.api_base
+        headers = {
+            'Authorization': 'Bearer ' + api_key,
+            'Content-Type': 'application/json'
+        }
+
+        data = {
+            'model': model,
+            'input': texts[0], 
+            'input_type': 'query'
+        }
+
+        try:
+            response = post(url, headers=headers, data=dumps(data))
+        except Exception as e:
+            raise InvokeConnectionError(str(e))
+        
+        if response.status_code != 200:
+            try:
+                resp = response.json()
+                msg = resp['detail']
+                if response.status_code == 401:
+                    raise InvokeAuthorizationError(msg)
+                elif response.status_code == 429:
+                    raise InvokeRateLimitError(msg)
+                elif response.status_code == 500:
+                    raise InvokeServerUnavailableError(msg)
+                else:
+                    raise InvokeError(msg)
+            except JSONDecodeError as e:
+                raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+        try:
+            resp = response.json()
+            embeddings = resp['data']
+            usage = resp['usage']
+        except Exception as e:
+            raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens'])
+
+        result = TextEmbeddingResult(
+            model=model,
+            embeddings=[[
+                float(data) for data in x['embedding']
+            ] for x in embeddings],
+            usage=usage
+        )
+
+        return result
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        num_tokens = 0
+        for text in texts:
+            # use JinaTokenizer to get num tokens
+            num_tokens += self._get_num_tokens_by_gpt2(text)
+        return num_tokens
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(model=model, credentials=credentials, texts=['ping'])
+        except InvokeAuthorizationError:
+            raise CredentialsValidateFailedError('Invalid api key')
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        return {
+            InvokeConnectionError: [
+                InvokeConnectionError
+            ],
+            InvokeServerUnavailableError: [
+                InvokeServerUnavailableError
+            ],
+            InvokeRateLimitError: [
+                InvokeRateLimitError
+            ],
+            InvokeAuthorizationError: [
+                InvokeAuthorizationError
+            ],
+            InvokeBadRequestError: [
+                KeyError
+            ]
+        }
+    
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        # get input price info
+        input_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            price_type=PriceType.INPUT,
+            tokens=tokens
+        )
+
+        # transform usage
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at
+        )
+
+        return usage
diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
index 4d4148aa91..429c646b77 100644
--- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
@@ -9,18 +9,33 @@ model_properties:
   mode: chat
   context_size: 4096
 parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
+      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
   - name: max_tokens
     use_template: max_tokens
     type: int
     default: 512
     min: 1
-    max: 4096
-  - name: temperature
-    use_template: temperature
+    max: 4000
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
     type: float
-    default: 0.7
-    min: 0
-    max: 2
+    default: 0.8
+    min: 0.01
+    max: 1.00
+    help:
+      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
+      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
 pricing:
   input: '0.0025'
   output: '0.0025'
diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
index 4fbe84e9b7..d0e181d007 100644
--- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
@@ -9,18 +9,33 @@ model_properties:
   mode: chat
   context_size: 200000
 parameter_rules:
-  - name: max_tokens
-    use_template: max_tokens
-    type: int
-    default: 1024
-    min: 1
-    max: 200000
   - name: temperature
     use_template: temperature
     type: float
-    default: 0.7
-    min: 0
-    max: 2
+    default: 0.6
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
+      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 4096
+    min: 1
+    max: 199950
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.9
+    min: 0.01
+    max: 1.00
+    help:
+      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
+      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
 pricing:
   input: '0.012'
   output: '0.012'
diff --git a/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml b/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
index 6195051f16..a6abcc401f 100644
--- a/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
@@ -9,18 +9,33 @@ model_properties:
   mode: chat
   context_size: 4096
 parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
+      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
   - name: max_tokens
     use_template: max_tokens
     type: int
     default: 512
     min: 1
-    max: 4096
-  - name: temperature
-    use_template: temperature
+    max: 4000
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
     type: float
-    default: 0.7
-    min: 0
-    max: 2
+    default: 0.8
+    min: 0.01
+    max: 1.00
+    help:
+      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
+      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
 pricing:
   input: '0.01'
   output: '0.03'
diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py
index 3f0467ee24..5fbc319fd6 100644
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@@ -45,11 +45,12 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
                     # delete Spliter character
                     page_content = document_node.page_content
                     if page_content.startswith(".") or page_content.startswith("。"):
-                        page_content = page_content[1:]
+                        page_content = page_content[1:].strip()
                     else:
                         page_content = page_content
-                    document_node.page_content = page_content
-                    split_documents.append(document_node)
+                    if len(page_content) > 0:
+                        document_node.page_content = page_content
+                        split_documents.append(document_node)
             all_documents.extend(split_documents)
         return all_documents
 
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index f066582ac8..d39a719655 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -2,7 +2,7 @@ version: '3.1'
 services:
   # API service
   api:
-    image: langgenius/dify-api:0.5.9
+    image: langgenius/dify-api:0.5.10
     restart: always
     environment:
       # Startup mode, 'api' starts the API server.
@@ -138,7 +138,7 @@ services:
   # worker service
   # The Celery worker for processing the queue.
   worker:
-    image: langgenius/dify-api:0.5.9
+    image: langgenius/dify-api:0.5.10
     restart: always
     environment:
       # Startup mode, 'worker' starts the Celery worker for processing the queue.
@@ -209,7 +209,7 @@ services:
 
   # Frontend web application.
   web:
-    image: langgenius/dify-web:0.5.9
+    image: langgenius/dify-web:0.5.10
     restart: always
     environment:
       EDITION: SELF_HOSTED
diff --git a/web/package.json b/web/package.json
index fc466f42b3..513efdc657 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "dify-web",
-  "version": "0.5.9",
+  "version": "0.5.10",
   "private": true,
   "scripts": {
     "dev": "next dev",