Merge branch 'main' into feat/new-login

* main: (77 commits)
  feat: add voyage ai as a new model provider (#8747)
  docs: add english versions for the files customizable_model_scale_out and predefined_model_scale_out (#8871)
  fix: #8843 event: tts_message_end always return in api streaming resp… (#8846)
  Add Jamba and Llama3.2 model support (#8878)
  fix(workflow): update tagging logic in GitHub Actions (#8882)
  chore: bump ruff to 0.6.8 for fixing violation in SIM910 (#8869)
  refactor: update Callback to an abstract class (#8868)
  feat: deprecate gte-Qwen2-7B-instruct embedding model (#8866)
  feat: add internlm2.5-20b and qwen2.5-coder-7b model (#8862)
  fix: customize model credentials were invalid despite the provider credentials being active (#8864)
  fix: update qwen2.5-coder-7b model name (#8861)
  fix(workflow/nodes/knowledge-retrieval/use-config): Preserve rerankin… (#8842)
  chore: fix wrong VectorType match case (#8857)
  feat: add min-connection and max-connection for pgvector (#8841)
  feat(Tools): add feishu tools (#8800)
  fix: delete harm catalog settings for gemini (#8829)
  Add Llama3.2 models in Groq provider (#8831)
  feat: deprecate mistral model for siliconflow (#8828)
  fix: AnalyticdbVector retrieval scores (#8803)
  fix: close log status option raise error (#8826)
  ...
This commit is contained in:
Joe 2024-09-29 17:15:29 +08:00
commit cd88f27cd5
381 changed files with 8346 additions and 1055 deletions

View File

@ -125,7 +125,7 @@ jobs:
with:
images: ${{ env[matrix.image_name_env] }}
tags: |
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }}
type=ref,event=branch
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}

46
.github/workflows/web-tests.yml vendored Normal file
View File

@ -0,0 +1,46 @@
name: Web Tests
on:
pull_request:
branches:
- main
paths:
- web/**
concurrency:
group: web-tests-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
test:
name: Web Tests
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./web
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: web/**
- name: Setup Node.js
uses: actions/setup-node@v4
if: steps.changed-files.outputs.any_changed == 'true'
with:
node-version: 20
cache: yarn
cache-dependency-path: ./web/package.json
- name: Install dependencies
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn install --frozen-lockfile
- name: Run tests
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn test

View File

@ -162,6 +162,8 @@ PGVECTOR_PORT=5433
PGVECTOR_USER=postgres
PGVECTOR_PASSWORD=postgres
PGVECTOR_DATABASE=postgres
PGVECTOR_MIN_CONNECTION=1
PGVECTOR_MAX_CONNECTION=5
# Tidb Vector configuration
TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com

View File

@ -53,11 +53,9 @@ from services.account_service import AccountService
warnings.simplefilter("ignore", ResourceWarning)
# fix windows platform
if os.name == "nt":
os.system('tzutil /s "UTC"')
else:
os.environ["TZ"] = "UTC"
os.environ["TZ"] = "UTC"
# windows platform not support tzset
if hasattr(time, "tzset"):
time.tzset()

View File

@ -652,7 +652,7 @@ where sites.id is null limit 1000"""
app_was_created.send(app, account=account)
except Exception as e:
failed_app_ids.append(app_id)
click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red"))
click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
logging.exception(f"Fix app related site missing issue failed, error: {e}")
continue

View File

@ -33,3 +33,13 @@ class PGVectorConfig(BaseSettings):
description="Name of the PostgreSQL database to connect to",
default=None,
)
PGVECTOR_MIN_CONNECTION: PositiveInt = Field(
description="Min connection of the PostgreSQL database",
default=1,
)
PGVECTOR_MAX_CONNECTION: PositiveInt = Field(
description="Max connection of the PostgreSQL database",
default=5,
)

View File

@ -563,10 +563,10 @@ class DatasetRetrievalSettingApi(Resource):
case (
VectorType.MILVUS
| VectorType.RELYT
| VectorType.PGVECTOR
| VectorType.TIDB_VECTOR
| VectorType.CHROMA
| VectorType.TENCENT
| VectorType.PGVECTO_RS
):
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
case (
@ -577,6 +577,7 @@ class DatasetRetrievalSettingApi(Resource):
| VectorType.MYSCALE
| VectorType.ORACLE
| VectorType.ELASTICSEARCH
| VectorType.PGVECTOR
):
return {
"retrieval_method": [

View File

@ -231,7 +231,8 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
except Exception as e:
logger.error(e)
break
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
if tts_publisher:
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
def _process_stream_response(
self,

View File

@ -75,10 +75,10 @@ class AppGenerateResponseConverter(ABC):
:return:
"""
# show_retrieve_source
updated_resources = []
if "retriever_resources" in metadata:
metadata["retriever_resources"] = []
for resource in metadata["retriever_resources"]:
metadata["retriever_resources"].append(
updated_resources.append(
{
"segment_id": resource["segment_id"],
"position": resource["position"],
@ -87,6 +87,7 @@ class AppGenerateResponseConverter(ABC):
"content": resource["content"],
}
)
metadata["retriever_resources"] = updated_resources
# show annotation reply
if "annotation_reply" in metadata:

View File

@ -309,7 +309,7 @@ class AppRunner:
if not prompt_messages:
prompt_messages = result.prompt_messages
if not usage and result.delta.usage:
if result.delta.usage:
usage = result.delta.usage
if not usage:

View File

@ -212,7 +212,8 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
except Exception as e:
logger.error(e)
break
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
if tts_publisher:
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
def _process_stream_response(
self,

View File

@ -248,7 +248,8 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
else:
start_listener_time = time.time()
yield MessageAudioStreamResponse(audio=audio.audio, task_id=task_id)
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
if publisher:
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
def _process_stream_response(
self, publisher: AppGeneratorTTSPublisher, trace_manager: Optional[TraceQueueManager] = None

View File

@ -5,6 +5,7 @@ from typing import Optional, cast
import numpy as np
from sqlalchemy.exc import IntegrityError
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
for i in range(0, len(embedding_queue_texts), max_chunks):
batch_texts = embedding_queue_texts[i : i + max_chunks]
embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
embedding_result = self._model_instance.invoke_text_embedding(
texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
)
for vector in embedding_result.embeddings:
try:
@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
redis_client.expire(embedding_cache_key, 600)
return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
try:
embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
embedding_result = self._model_instance.invoke_text_embedding(
texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
)
embedding_results = embedding_result.embeddings[0]
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()

View File

@ -0,0 +1,10 @@
from enum import Enum
class EmbeddingInputType(Enum):
"""
Enum for embedding input type.
"""
DOCUMENT = "document"
QUERY = "query"

View File

@ -119,7 +119,7 @@ class ProviderConfiguration(BaseModel):
credentials = model_configuration.credentials
break
if self.custom_configuration.provider:
if not credentials and self.custom_configuration.provider:
credentials = self.custom_configuration.provider.credentials
return credentials

View File

@ -65,7 +65,6 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
"Please help me predict the three most likely questions that human would ask, "
"and keeping each question under 20 characters.\n"
"MAKE SURE your output is the SAME language as the Assistant's latest response"
"(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
"The output must be an array in JSON format following the specified schema:\n"
'["question1","question2","question3"]\n'
)

View File

@ -3,6 +3,7 @@ import os
from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast
from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError
@ -158,12 +159,15 @@ class ModelInstance:
tools=tools,
)
def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
def invoke_text_embedding(
self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
) -> TextEmbeddingResult:
"""
Invoke large language model
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
if not isinstance(self.model_type_instance, TextEmbeddingModel):
@ -176,6 +180,7 @@ class ModelInstance:
credentials=self.credentials,
texts=texts,
user=user,
input_type=input_type,
)
def get_text_embedding_num_tokens(self, texts: list[str]) -> int:

View File

@ -1,3 +1,4 @@
from abc import ABC, abstractmethod
from typing import Optional
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
@ -13,7 +14,7 @@ _TEXT_COLOR_MAPPING = {
}
class Callback:
class Callback(ABC):
"""
Base class for callbacks.
Only for LLM.
@ -21,6 +22,7 @@ class Callback:
raise_error: bool = False
@abstractmethod
def on_before_invoke(
self,
llm_instance: AIModel,
@ -48,6 +50,7 @@ class Callback:
"""
raise NotImplementedError()
@abstractmethod
def on_new_chunk(
self,
llm_instance: AIModel,
@ -77,6 +80,7 @@ class Callback:
"""
raise NotImplementedError()
@abstractmethod
def on_after_invoke(
self,
llm_instance: AIModel,
@ -106,6 +110,7 @@ class Callback:
"""
raise NotImplementedError()
@abstractmethod
def on_invoke_error(
self,
llm_instance: AIModel,

View File

@ -0,0 +1,310 @@
## Custom Integration of Pre-defined Models
### Introduction
After completing the vendors integration, the next step is to connect the vendor's models. To illustrate the entire connection process, we will use Xinference as an example to demonstrate a complete vendor integration.
It is important to note that for custom models, each model connection requires a complete vendor credential.
Unlike pre-defined models, a custom vendor integration always includes the following two parameters, which do not need to be defined in the vendor YAML file.
![](images/index/image-3.png)
As mentioned earlier, vendors do not need to implement validate_provider_credential. The runtime will automatically call the corresponding model layer's validate_credentials to validate the credentials based on the model type and name selected by the user.
### Writing the Vendor YAML
First, we need to identify the types of models supported by the vendor we are integrating.
Currently supported model types are as follows:
- `llm` Text Generation Models
- `text_embedding` Text Embedding Models
- `rerank` Rerank Models
- `speech2text` Speech-to-Text
- `tts` Text-to-Speech
- `moderation` Moderation
Xinference supports LLM, Text Embedding, and Rerank. So we will start by writing xinference.yaml.
```yaml
provider: xinference #Define the vendor identifier
label: # Vendor display name, supports both en_US (English) and zh_Hans (Simplified Chinese). If zh_Hans is not set, it will use en_US by default.
en_US: Xorbits Inference
icon_small: # Small icon, refer to other vendors' icons stored in the _assets directory within the vendor implementation directory; follows the same language policy as the label
en_US: icon_s_en.svg
icon_large: # Large icon
en_US: icon_l_en.svg
help: # Help information
title:
en_US: How to deploy Xinference
zh_Hans: 如何部署 Xinference
url:
en_US: https://github.com/xorbitsai/inference
supported_model_types: # Supported model types. Xinference supports LLM, Text Embedding, and Rerank
- llm
- text-embedding
- rerank
configurate_methods: # Since Xinference is a locally deployed vendor with no predefined models, users need to deploy whatever models they need according to Xinference documentation. Thus, it only supports custom models.
- customizable-model
provider_credential_schema:
credential_form_schemas:
```
Then, we need to determine what credentials are required to define a model in Xinference.
- Since it supports three different types of models, we need to specify the model_type to denote the model type. Here is how we can define it:
```yaml
provider_credential_schema:
credential_form_schemas:
- variable: model_type
type: select
label:
en_US: Model type
zh_Hans: 模型类型
required: true
options:
- value: text-generation
label:
en_US: Language Model
zh_Hans: 语言模型
- value: embeddings
label:
en_US: Text Embedding
- value: reranking
label:
en_US: Rerank
```
- Next, each model has its own model_name, so we need to define that here:
```yaml
- variable: model_name
type: text-input
label:
en_US: Model name
zh_Hans: 模型名称
required: true
placeholder:
zh_Hans: 填写模型名称
en_US: Input model name
```
- Specify the Xinference local deployment address:
```yaml
- variable: server_url
label:
zh_Hans: 服务器URL
en_US: Server url
type: text-input
required: true
placeholder:
zh_Hans: 在此输入Xinference的服务器地址如 https://example.com/xxx
en_US: Enter the url of your Xinference, for example https://example.com/xxx
```
- Each model has a unique model_uid, so we also need to define that here:
```yaml
- variable: model_uid
label:
zh_Hans: 模型UID
en_US: Model uid
type: text-input
required: true
placeholder:
zh_Hans: 在此输入您的Model UID
en_US: Enter the model uid
```
Now, we have completed the basic definition of the vendor.
### Writing the Model Code
Next, let's take the `llm` type as an example and write `xinference.llm.llm.py`.
In `llm.py`, create a Xinference LLM class, we name it `XinferenceAILargeLanguageModel` (this can be arbitrary), inheriting from the `__base.large_language_model.LargeLanguageModel` base class, and implement the following methods:
- LLM Invocation
Implement the core method for LLM invocation, supporting both stream and synchronous responses.
```python
def _invoke(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
stream: bool = True, user: Optional[str] = None) \
-> Union[LLMResult, Generator]:
"""
Invoke large language model
:param model: model name
:param credentials: model credentials
:param prompt_messages: prompt messages
:param model_parameters: model parameters
:param tools: tools for tool usage
:param stop: stop words
:param stream: is the response a stream
:param user: unique user id
:return: full response or stream response chunk generator result
"""
```
When implementing, ensure to use two functions to return data separately for synchronous and stream responses. This is important because Python treats functions containing the `yield` keyword as generator functions, mandating them to return `Generator` types. Heres an example (note that the example uses simplified parameters; in real implementation, use the parameter list as defined above):
```python
def _invoke(self, stream: bool, **kwargs) \
-> Union[LLMResult, Generator]:
if stream:
return self._handle_stream_response(**kwargs)
return self._handle_sync_response(**kwargs)
def _handle_stream_response(self, **kwargs) -> Generator:
for chunk in response:
yield chunk
def _handle_sync_response(self, **kwargs) -> LLMResult:
return LLMResult(**response)
```
- Pre-compute Input Tokens
If the model does not provide an interface for pre-computing tokens, you can return 0 directly.
```python
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],tools: Optional[list[PromptMessageTool]] = None) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param prompt_messages: prompt messages
:param tools: tools for tool usage
:return: token count
"""
```
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
- Model Credentials Validation
Similar to vendor credentials validation, this method validates individual model credentials.
```python
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return: None
"""
```
- Model Parameter Schema
Unlike custom types, since the YAML file does not define which parameters a model supports, we need to dynamically generate the model parameter schema.
For instance, Xinference supports `max_tokens`, `temperature`, and `top_p` parameters.
However, some vendors may support different parameters for different models. For example, the `OpenLLM` vendor supports `top_k`, but not all models provided by this vendor support `top_k`. Let's say model A supports `top_k` but model B does not. In such cases, we need to dynamically generate the model parameter schema, as illustrated below:
```python
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
"""
used to define customizable model schema
"""
rules = [
ParameterRule(
name='temperature', type=ParameterType.FLOAT,
use_template='temperature',
label=I18nObject(
zh_Hans='温度', en_US='Temperature'
)
),
ParameterRule(
name='top_p', type=ParameterType.FLOAT,
use_template='top_p',
label=I18nObject(
zh_Hans='Top P', en_US='Top P'
)
),
ParameterRule(
name='max_tokens', type=ParameterType.INT,
use_template='max_tokens',
min=1,
default=512,
label=I18nObject(
zh_Hans='最大生成长度', en_US='Max Tokens'
)
)
]
# if model is A, add top_k to rules
if model == 'A':
rules.append(
ParameterRule(
name='top_k', type=ParameterType.INT,
use_template='top_k',
min=1,
default=50,
label=I18nObject(
zh_Hans='Top K', en_US='Top K'
)
)
)
"""
some NOT IMPORTANT code here
"""
entity = AIModelEntity(
model=model,
label=I18nObject(
en_US=model
),
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=model_type,
model_properties={
ModelPropertyKey.MODE: ModelType.LLM,
},
parameter_rules=rules
)
return entity
```
- Exception Error Mapping
When a model invocation error occurs, it should be mapped to the runtime's specified `InvokeError` type, enabling Dify to handle different errors appropriately.
Runtime Errors:
- `InvokeConnectionError` Connection error during invocation
- `InvokeServerUnavailableError` Service provider unavailable
- `InvokeRateLimitError` Rate limit reached
- `InvokeAuthorizationError` Authorization failure
- `InvokeBadRequestError` Invalid request parameters
```python
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
The key is the error type thrown to the caller
The value is the error type thrown by the model,
which needs to be converted into a unified error type for the caller.
:return: Invoke error mapping
"""
```
For interface method details, see: [Interfaces](./interfaces.md). For specific implementations, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).

Binary file not shown.

After

Width:  |  Height:  |  Size: 230 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 205 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 262 KiB

View File

@ -0,0 +1,173 @@
## Predefined Model Integration
After completing the vendor integration, the next step is to integrate the models from the vendor.
First, we need to determine the type of model to be integrated and create the corresponding model type `module` under the respective vendor's directory.
Currently supported model types are:
- `llm` Text Generation Model
- `text_embedding` Text Embedding Model
- `rerank` Rerank Model
- `speech2text` Speech-to-Text
- `tts` Text-to-Speech
- `moderation` Moderation
Continuing with `Anthropic` as an example, `Anthropic` only supports LLM, so create a `module` named `llm` under `model_providers.anthropic`.
For predefined models, we first need to create a YAML file named after the model under the `llm` `module`, such as `claude-2.1.yaml`.
### Prepare Model YAML
```yaml
model: claude-2.1 # Model identifier
# Display name of the model, which can be set to en_US English or zh_Hans Chinese. If zh_Hans is not set, it will default to en_US.
# This can also be omitted, in which case the model identifier will be used as the label
label:
en_US: claude-2.1
model_type: llm # Model type, claude-2.1 is an LLM
features: # Supported features, agent-thought supports Agent reasoning, vision supports image understanding
- agent-thought
model_properties: # Model properties
mode: chat # LLM mode, complete for text completion models, chat for conversation models
context_size: 200000 # Maximum context size
parameter_rules: # Parameter rules for the model call; only LLM requires this
- name: temperature # Parameter variable name
# Five default configuration templates are provided: temperature/top_p/max_tokens/presence_penalty/frequency_penalty
# The template variable name can be set directly in use_template, which will use the default configuration in entities.defaults.PARAMETER_RULE_TEMPLATE
# Additional configuration parameters will override the default configuration if set
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label: # Display name of the parameter
zh_Hans: 取样数量
en_US: Top k
type: int # Parameter type, supports float/int/string/boolean
help: # Help information, describing the parameter's function
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false # Whether the parameter is mandatory; can be omitted
- name: max_tokens_to_sample
use_template: max_tokens
default: 4096 # Default value of the parameter
min: 1 # Minimum value of the parameter, applicable to float/int only
max: 4096 # Maximum value of the parameter, applicable to float/int only
pricing: # Pricing information
input: '8.00' # Input unit price, i.e., prompt price
output: '24.00' # Output unit price, i.e., response content price
unit: '0.000001' # Price unit, meaning the above prices are per 100K
currency: USD # Price currency
```
It is recommended to prepare all model configurations before starting the implementation of the model code.
You can also refer to the YAML configuration information under the corresponding model type directories of other vendors in the `model_providers` directory. For the complete YAML rules, refer to: [Schema](schema.md#aimodelentity).
### Implement the Model Call Code
Next, create a Python file named `llm.py` under the `llm` `module` to write the implementation code.
Create an Anthropic LLM class named `AnthropicLargeLanguageModel` (or any other name), inheriting from the `__base.large_language_model.LargeLanguageModel` base class, and implement the following methods:
- LLM Call
Implement the core method for calling the LLM, supporting both streaming and synchronous responses.
```python
def _invoke(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
stream: bool = True, user: Optional[str] = None) \
-> Union[LLMResult, Generator]:
"""
Invoke large language model
:param model: model name
:param credentials: model credentials
:param prompt_messages: prompt messages
:param model_parameters: model parameters
:param tools: tools for tool calling
:param stop: stop words
:param stream: is stream response
:param user: unique user id
:return: full response or stream response chunk generator result
"""
```
Ensure to use two functions for returning data, one for synchronous returns and the other for streaming returns, because Python identifies functions containing the `yield` keyword as generator functions, fixing the return type to `Generator`. Thus, synchronous and streaming returns need to be implemented separately, as shown below (note that the example uses simplified parameters, for actual implementation follow the above parameter list):
```python
def _invoke(self, stream: bool, **kwargs) \
-> Union[LLMResult, Generator]:
if stream:
return self._handle_stream_response(**kwargs)
return self._handle_sync_response(**kwargs)
def _handle_stream_response(self, **kwargs) -> Generator:
for chunk in response:
yield chunk
def _handle_sync_response(self, **kwargs) -> LLMResult:
return LLMResult(**response)
```
- Pre-compute Input Tokens
If the model does not provide an interface to precompute tokens, return 0 directly.
```python
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param prompt_messages: prompt messages
:param tools: tools for tool calling
:return:
"""
```
- Validate Model Credentials
Similar to vendor credential validation, but specific to a single model.
```python
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
```
- Map Invoke Errors
When a model call fails, map it to a specific `InvokeError` type as required by Runtime, allowing Dify to handle different errors accordingly.
Runtime Errors:
- `InvokeConnectionError` Connection error
- `InvokeServerUnavailableError` Service provider unavailable
- `InvokeRateLimitError` Rate limit reached
- `InvokeAuthorizationError` Authorization failed
- `InvokeBadRequestError` Parameter error
```python
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
The key is the error type thrown to the caller
The value is the error type thrown by the model,
which needs to be converted into a unified error type for the caller.
:return: Invoke error mapping
"""
```
For interface method explanations, see: [Interfaces](./interfaces.md). For detailed implementation, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).

View File

@ -58,7 +58,7 @@ provider_credential_schema: # Provider credential rules, as Anthropic only supp
en_US: Enter your API URL
```
You can also refer to the YAML configuration information under other provider directories in `model_providers`. The complete YAML rules are available at: [Schema](schema.md#Provider).
You can also refer to the YAML configuration information under other provider directories in `model_providers`. The complete YAML rules are available at: [Schema](schema.md#provider).
### Implementing Provider Code

View File

@ -117,7 +117,7 @@ model_credential_schema:
en_US: Enter your API Base
```
也可以参考 `model_providers` 目录下其他供应商目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#Provider)。
也可以参考 `model_providers` 目录下其他供应商目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#provider)。
#### 实现供应商代码

View File

@ -4,6 +4,7 @@ from typing import Optional
from pydantic import ConfigDict
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel
@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
model_config = ConfigDict(protected_namespaces=())
def invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke large language model
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
self.started_at = time.perf_counter()
try:
return self._invoke(model, credentials, texts, user)
return self._invoke(model, credentials, texts, user, input_type)
except Exception as e:
raise self._transform_invoke_error(e)
@abstractmethod
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke large language model
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
raise NotImplementedError

View File

@ -38,3 +38,6 @@
- perfxcloud
- zhinao
- fireworks
- mixedbread
- nomic
- voyage

View File

@ -7,6 +7,7 @@ import numpy as np
import tiktoken
from openai import AzureOpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
base_model_name = credentials["base_model_name"]
credentials_kwargs = self._to_credential_kwargs(credentials)
client = AzureOpenAI(**credentials_kwargs)

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]

View File

@ -6,6 +6,8 @@
- anthropic.claude-v2:1
- anthropic.claude-3-sonnet-v1:0
- anthropic.claude-3-haiku-v1:0
- ai21.jamba-1-5-large-v1:0
- ai21.jamba-1-5-mini-v1:0
- cohere.command-light-text-v14
- cohere.command-text-v14
- cohere.command-r-plus-v1.0
@ -15,6 +17,10 @@
- meta.llama3-1-405b-instruct-v1:0
- meta.llama3-8b-instruct-v1:0
- meta.llama3-70b-instruct-v1:0
- us.meta.llama3-2-1b-instruct-v1:0
- us.meta.llama3-2-3b-instruct-v1:0
- us.meta.llama3-2-11b-instruct-v1:0
- us.meta.llama3-2-90b-instruct-v1:0
- meta.llama2-13b-chat-v1
- meta.llama2-70b-chat-v1
- mistral.mistral-large-2407-v1:0

View File

@ -0,0 +1,26 @@
model: ai21.jamba-1-5-large-v1:0
label:
en_US: Jamba 1.5 Large
model_type: llm
model_properties:
mode: completion
context_size: 256000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
min: 0.0
max: 2.0
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
pricing:
input: '0.002'
output: '0.008'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,26 @@
model: ai21.jamba-1-5-mini-v1:0
label:
en_US: Jamba 1.5 Mini
model_type: llm
model_properties:
mode: completion
context_size: 256000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
min: 0.0
max: 2.0
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
pricing:
input: '0.0002'
output: '0.0004'
unit: '0.001'
currency: USD

View File

@ -63,6 +63,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
{"prefix": "us.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
{"prefix": "eu.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
{"prefix": "anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
{"prefix": "us.meta.llama3-2", "support_system_prompts": True, "support_tool_use": True},
{"prefix": "meta.llama", "support_system_prompts": True, "support_tool_use": False},
{"prefix": "mistral.mistral-7b-instruct", "support_system_prompts": False, "support_tool_use": False},
{"prefix": "mistral.mixtral-8x7b-instruct", "support_system_prompts": False, "support_tool_use": False},
@ -70,6 +71,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
{"prefix": "mistral.mistral-small", "support_system_prompts": True, "support_tool_use": True},
{"prefix": "cohere.command-r", "support_system_prompts": True, "support_tool_use": True},
{"prefix": "amazon.titan", "support_system_prompts": False, "support_tool_use": False},
{"prefix": "ai21.jamba-1-5", "support_system_prompts": True, "support_tool_use": False},
]
@staticmethod

View File

@ -0,0 +1,29 @@
model: us.meta.llama3-2-11b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 11B Instruct
model_type: llm
features:
- vision
- tool-call
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.00035'
output: '0.00035'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,26 @@
model: us.meta.llama3-2-1b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 1B Instruct
model_type: llm
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.0001'
output: '0.0001'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,26 @@
model: us.meta.llama3-2-3b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 3B Instruct
model_type: llm
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.00015'
output: '0.00015'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,31 @@
model: us.meta.llama3-2-90b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 90B Instruct
model_type: llm
features:
- tool-call
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
default: 0.9
min: 0
max: 1
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.002'
output: '0.002'
unit: '0.001'
currency: USD

View File

@ -13,6 +13,7 @@ from botocore.exceptions import (
UnknownServiceError,
)
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
class BedrockTextEmbeddingModel(TextEmbeddingModel):
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
client_config = Config(region_name=credentials["aws_region"])

View File

@ -5,6 +5,7 @@ import cohere
import numpy as np
from cohere.core import RequestOptions
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
# get model properties

View File

@ -15,6 +15,7 @@ help:
en_US: https://fireworks.ai/account/api-keys
supported_model_types:
- llm
- text-embedding
configurate_methods:
- predefined-model
provider_credential_schema:

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
label:
zh_Hans: Llama 3.2 11B Vision Instruct
en_US: Llama 3.2 11B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.2'
output: '0.2'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-1b-instruct
label:
zh_Hans: Llama 3.2 1B Instruct
en_US: Llama 3.2 1B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-3b-instruct
label:
zh_Hans: Llama 3.2 3B Instruct
en_US: Llama 3.2 3B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
label:
zh_Hans: Llama 3.2 90B Vision Instruct
en_US: Llama 3.2 90B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.9'
output: '0.9'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,12 @@
model: WhereIsAI/UAE-Large-V1
label:
zh_Hans: UAE-Large-V1
en_US: UAE-Large-V1
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: thenlper/gte-base
label:
zh_Hans: GTE-base
en_US: GTE-base
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: thenlper/gte-large
label:
zh_Hans: GTE-large
en_US: GTE-large
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: nomic-ai/nomic-embed-text-v1.5
label:
zh_Hans: nomic-embed-text-v1.5
en_US: nomic-embed-text-v1.5
model_type: text-embedding
model_properties:
context_size: 8192
max_chunks: 16
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: nomic-ai/nomic-embed-text-v1
label:
zh_Hans: nomic-embed-text-v1
en_US: nomic-embed-text-v1
model_type: text-embedding
model_properties:
context_size: 8192
max_chunks: 16
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,151 @@
import time
from collections.abc import Mapping
from typing import Optional, Union
import numpy as np
from openai import OpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
"""
Model class for Fireworks text embedding model.
"""
def _invoke(
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
extra_model_kwargs = {}
if user:
extra_model_kwargs["user"] = user
extra_model_kwargs["encoding_format"] = "float"
context_size = self._get_context_size(model, credentials)
max_chunks = self._get_max_chunks(model, credentials)
inputs = []
indices = []
used_tokens = 0
for i, text in enumerate(texts):
# Here token count is only an approximation based on the GPT2 tokenizer
# TODO: Optimize for better token estimation and chunking
num_tokens = self._get_num_tokens_by_gpt2(text)
if num_tokens >= context_size:
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
# if num tokens is larger than context length, only use the start
inputs.append(text[0:cutoff])
else:
inputs.append(text)
indices += [i]
batched_embeddings = []
_iter = range(0, len(inputs), max_chunks)
for i in _iter:
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
client=client,
texts=inputs[i : i + max_chunks],
extra_model_kwargs=extra_model_kwargs,
)
used_tokens += embedding_used_tokens
batched_embeddings += embeddings_batch
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
def validate_credentials(self, model: str, credentials: Mapping) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
# call embedding model
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
def _embedding_invoke(
self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
) -> tuple[list[list[float]], int]:
"""
Invoke embedding model
:param model: model name
:param client: model client
:param texts: texts to embed
:param extra_model_kwargs: extra model kwargs
:return: embeddings and used tokens
"""
response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
return [data.embedding for data in response.data], response.usage.total_tokens
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: input tokens
:return: usage
"""
input_price_info = self.get_price(
model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
)
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at,
)
return usage

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-001
label:
en_US: Gemini 1.5 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-002
label:
en_US: Gemini 1.5 Flash 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-8b-exp-0924
label:
en_US: Gemini 1.5 Flash 8B 0924
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -1,6 +1,6 @@
model: gemini-1.5-flash-latest
label:
en_US: Gemini 1.5 Flash
en_US: Gemini 1.5 Flash Latest
model_type: llm
features:
- agent-thought
@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash
label:
en_US: Gemini 1.5 Flash
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro-001
label:
en_US: Gemini 1.5 Pro 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro-002
label:
en_US: Gemini 1.5 Pro 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -1,6 +1,6 @@
model: gemini-1.5-pro-latest
label:
en_US: Gemini 1.5 Pro
en_US: Gemini 1.5 Pro Latest
model_type: llm
features:
- agent-thought
@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro
label:
en_US: Gemini 1.5 Pro
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -27,6 +27,15 @@ parameter_rules:
default: 4096
min: 1
max: 4096
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -31,6 +31,15 @@ parameter_rules:
max: 2048
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'

View File

@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
import google.generativeai as genai
import requests
from google.api_core import exceptions
from google.generativeai import client
from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
from google.generativeai.client import _ClientManager
from google.generativeai.types import ContentType, GenerateContentResponse
from google.generativeai.types.content_types import to_part
from PIL import Image
@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
history.append(content)
# Create a new ClientManager with tenant's API key
new_client_manager = client._ClientManager()
new_client_manager = _ClientManager()
new_client_manager.configure(api_key=credentials["google_api_key"])
new_custom_client = new_client_manager.make_client("generative")
google_model._client = new_custom_client
safety_settings = {
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}
response = google_model.generate_content(
contents=history,
generation_config=genai.types.GenerationConfig(**config_kwargs),
stream=stream,
safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
request_options={"timeout": 600},
)

View File

@ -0,0 +1,25 @@
model: llama-3.2-11b-text-preview
label:
zh_Hans: Llama 3.2 11B Text (Preview)
en_US: Llama 3.2 11B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-1b-preview
label:
zh_Hans: Llama 3.2 1B Text (Preview)
en_US: Llama 3.2 1B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-3b-preview
label:
zh_Hans: Llama 3.2 3B Text (Preview)
en_US: Llama 3.2 3B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-90b-text-preview
label:
zh_Hans: Llama 3.2 90B Text (Preview)
en_US: Llama 3.2 90B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -6,6 +6,7 @@ import numpy as np
import requests
from huggingface_hub import HfApi, InferenceClient
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
client = InferenceClient(token=credentials["huggingfacehub_api_token"])
execute_model = model

View File

@ -1,6 +1,7 @@
import time
from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
server_url = credentials["server_url"]

View File

@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""

View File

@ -1,6 +1,6 @@
provider: jina
label:
en_US: Jina
en_US: Jina AI
description:
en_US: Embedding and Rerank Model Supported
icon_small:
@ -11,7 +11,7 @@ background: "#EFFDFD"
help:
title:
en_US: Get your API key from Jina AI
zh_Hans: 从 Jina 获取 API Key
zh_Hans: 从 Jina AI 获取 API Key
url:
en_US: https://jina.ai/
supported_model_types:

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,8 +28,37 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.jina.ai/v1"
def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
"""
Parse model credentials
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return: parsed credentials
"""
def transform_jina_input_text(model, text):
if model == "jina-clip-v1":
return {"text": text}
return text
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
# model specific parameters
if model == "jina-embeddings-v3":
# set `task` type according to input type for the best performance
data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
return data
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -37,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
@ -49,15 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
def transform_jina_input_text(model, text):
if model == "jina-clip-v1":
return {"text": text}
return text
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
if model == "jina-embeddings-v3":
data["task"] = "text-matching"
data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
try:
response = post(url, headers=headers, data=dumps(data))

View File

@ -5,6 +5,7 @@ from typing import Optional
from requests import post
from yarl import URL
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
class LocalAITextEmbeddingModel(TextEmbeddingModel):
"""
Model class for Jina text embedding model.
Model class for LocalAI text embedding model.
"""
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
if len(texts) != 1:

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.minimax.chat/v1/embeddings"
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
api_key = credentials["minimax_api_key"]

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

View File

@ -0,0 +1,27 @@
import logging
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
class MixedBreadProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
"""
Validate provider credentials
if validate failed, raise exception
:param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
"""
try:
model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
# Use `mxbai-embed-large-v1` model for validate,
model_instance.validate_credentials(model="mxbai-embed-large-v1", credentials=credentials)
except CredentialsValidateFailedError as ex:
raise ex
except Exception as ex:
logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
raise ex

View File

@ -0,0 +1,31 @@
provider: mixedbread
label:
en_US: MixedBread
description:
en_US: Embedding and Rerank Model Supported
icon_small:
en_US: icon_s_en.png
icon_large:
en_US: icon_l_en.png
background: "#EFFDFD"
help:
title:
en_US: Get your API key from MixedBread AI
zh_Hans: 从 MixedBread 获取 API Key
url:
en_US: https://www.mixedbread.ai/
supported_model_types:
- text-embedding
- rerank
configurate_methods:
- predefined-model
provider_credential_schema:
credential_form_schemas:
- variable: api_key
label:
en_US: API Key
type: secret-input
required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key

View File

@ -0,0 +1,4 @@
model: mxbai-rerank-large-v1
model_type: rerank
model_properties:
context_size: 512

View File

@ -0,0 +1,125 @@
from typing import Optional
import httpx
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
class MixedBreadRerankModel(RerankModel):
"""
Model class for MixedBread rerank model.
"""
def _invoke(
self,
model: str,
credentials: dict,
query: str,
docs: list[str],
score_threshold: Optional[float] = None,
top_n: Optional[int] = None,
user: Optional[str] = None,
) -> RerankResult:
"""
Invoke rerank model
:param model: model name
:param credentials: model credentials
:param query: search query
:param docs: docs for reranking
:param score_threshold: score threshold
:param top_n: top n documents to return
:param user: unique user id
:return: rerank result
"""
if len(docs) == 0:
return RerankResult(model=model, docs=[])
base_url = credentials.get("base_url", "https://api.mixedbread.ai/v1")
base_url = base_url.removesuffix("/")
try:
response = httpx.post(
base_url + "/reranking",
json={"model": model, "query": query, "input": docs, "top_k": top_n, "return_input": True},
headers={"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"},
)
response.raise_for_status()
results = response.json()
rerank_documents = []
for result in results["data"]:
rerank_document = RerankDocument(
index=result["index"],
text=result["input"],
score=result["score"],
)
if score_threshold is None or result["score"] >= score_threshold:
rerank_documents.append(rerank_document)
return RerankResult(model=model, docs=rerank_documents)
except httpx.HTTPStatusError as e:
raise InvokeServerUnavailableError(str(e))
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
self._invoke(
model=model,
credentials=credentials,
query="What is the capital of the United States?",
docs=[
"Carson City is the capital city of the American state of Nevada. At the 2010 United States "
"Census, Carson City had a population of 55,274.",
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
"are a political division controlled by the United States. Its capital is Saipan.",
],
score_threshold=0.8,
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
"""
return {
InvokeConnectionError: [httpx.ConnectError],
InvokeServerUnavailableError: [httpx.RemoteProtocolError],
InvokeRateLimitError: [],
InvokeAuthorizationError: [httpx.HTTPStatusError],
InvokeBadRequestError: [httpx.RequestError],
}
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
"""
generate custom model entities from credentials
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
model_type=ModelType.RERANK,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
)
return entity

View File

@ -0,0 +1,8 @@
model: mxbai-embed-2d-large-v1
model_type: text-embedding
model_properties:
context_size: 512
pricing:
input: '0.0001'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,8 @@
model: mxbai-embed-large-v1
model_type: text-embedding
model_properties:
context_size: 512
pricing:
input: '0.0001'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,170 @@
import time
from json import JSONDecodeError, dumps
from typing import Optional
import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
"""
Model class for MixedBread text embedding model.
"""
api_base: str = "https://api.mixedbread.ai/v1"
def _invoke(
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
if not api_key:
raise CredentialsValidateFailedError("api_key is required")
base_url = credentials.get("base_url", self.api_base)
base_url = base_url.removesuffix("/")
url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
data = {"model": model, "input": texts}
try:
response = requests.post(url, headers=headers, data=dumps(data))
except Exception as e:
raise InvokeConnectionError(str(e))
if response.status_code != 200:
try:
resp = response.json()
msg = resp["detail"]
if response.status_code == 401:
raise InvokeAuthorizationError(msg)
elif response.status_code == 429:
raise InvokeRateLimitError(msg)
elif response.status_code == 500:
raise InvokeServerUnavailableError(msg)
else:
raise InvokeBadRequestError(msg)
except JSONDecodeError as e:
raise InvokeServerUnavailableError(
f"Failed to convert response to json: {e} with text: {response.text}"
)
try:
resp = response.json()
embeddings = resp["data"]
usage = resp["usage"]
except Exception as e:
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
result = TextEmbeddingResult(
model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
)
return result
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
self._invoke(model=model, credentials=credentials, texts=["ping"])
except Exception as e:
raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
return {
InvokeConnectionError: [InvokeConnectionError],
InvokeServerUnavailableError: [InvokeServerUnavailableError],
InvokeRateLimitError: [InvokeRateLimitError],
InvokeAuthorizationError: [InvokeAuthorizationError],
InvokeBadRequestError: [KeyError, InvokeBadRequestError],
}
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: input tokens
:return: usage
"""
# get input price info
input_price_info = self.get_price(
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
)
# transform usage
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at,
)
return usage
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
"""
generate custom model entities from credentials
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
model_type=ModelType.TEXT_EMBEDDING,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
)
return entity

View File

@ -0,0 +1,13 @@
<svg width="93" height="31" viewBox="0 0 93 31" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M89.6037 29.888C88.9464 29.888 88.3667 29.7302 87.8647 29.4145C87.3626 29.0936 86.9719 28.6407 86.6924 28.0559L87.2979 27.4037C87.5464 27.9109 87.8698 28.3069 88.2684 28.5915C88.6669 28.871 89.1094 29.0108 89.5959 29.0108C89.922 29.0108 90.2196 28.9435 90.4887 28.8089C90.763 28.6744 90.9804 28.4829 91.1408 28.2344C91.3064 27.9808 91.3892 27.6806 91.3892 27.3339C91.3892 27.0182 91.3116 26.7697 91.1563 26.5886C91.0062 26.4074 90.7837 26.2522 90.4887 26.1228C90.1988 25.9882 89.8366 25.8381 89.4018 25.6725C89.0654 25.5379 88.7393 25.3853 88.4236 25.2145C88.1079 25.0437 87.8465 24.8289 87.6395 24.5701C87.4377 24.3061 87.3367 23.9723 87.3367 23.5686C87.3367 23.1598 87.4454 22.7975 87.6628 22.4817C87.8802 22.1609 88.1804 21.9098 88.5634 21.7287C88.9464 21.5424 89.3811 21.4492 89.8676 21.4492C90.3127 21.4492 90.7293 21.545 91.1175 21.7365C91.5109 21.928 91.8628 22.1997 92.1733 22.5516L91.6532 23.2115C91.177 22.5853 90.5844 22.2721 89.8754 22.2721C89.4406 22.2721 89.0861 22.386 88.8118 22.6137C88.5427 22.8415 88.4081 23.1391 88.4081 23.5065C88.4081 23.7705 88.4935 23.9904 88.6643 24.1664C88.8351 24.3424 89.0576 24.4925 89.3319 24.6167C89.6114 24.7409 89.9116 24.8651 90.2325 24.9893C90.6983 25.1653 91.102 25.3413 91.4436 25.5172C91.7903 25.6932 92.0595 25.9183 92.251 26.1927C92.4425 26.4618 92.5382 26.8293 92.5382 27.2951C92.5382 27.8281 92.414 28.2888 92.1656 28.6769C91.9171 29.0651 91.5704 29.3653 91.1253 29.5775C90.6854 29.7845 90.1781 29.888 89.6037 29.888Z" fill="#3C593D"/>
<path d="M79.8324 29.8841C79.0871 29.8841 78.4143 29.7029 77.8139 29.3406C77.2187 28.9732 76.7451 28.4711 76.3932 27.8345C76.0464 27.1979 75.873 26.4708 75.873 25.653C75.873 24.8456 76.0438 24.1262 76.3854 23.4948C76.7322 22.8582 77.2032 22.3562 77.7984 21.9887C78.3987 21.6212 79.0767 21.4375 79.8324 21.4375C80.5518 21.4375 81.2039 21.6057 81.7888 21.9421C82.3736 22.2785 82.8187 22.7443 83.1241 23.3395V21.6859H84.2575V29.6356H83.1241V27.9587C82.7825 28.5591 82.3244 29.0301 81.7499 29.3717C81.1754 29.7133 80.5363 29.8841 79.8324 29.8841ZM80.1119 28.8981C80.7071 28.8981 81.2324 28.761 81.6878 28.4867C82.1485 28.2072 82.5107 27.8242 82.7747 27.3377C83.0387 26.846 83.1706 26.287 83.1706 25.6608C83.1706 25.0294 83.0387 24.4704 82.7747 23.9839C82.5159 23.4974 82.1562 23.117 81.6956 22.8427C81.235 22.5632 80.7071 22.4235 80.1119 22.4235C79.5167 22.4235 78.9888 22.5632 78.5281 22.8427C78.0675 23.117 77.7052 23.4974 77.4413 23.9839C77.1773 24.4704 77.0453 25.0294 77.0453 25.6608C77.0453 26.287 77.1773 26.846 77.4413 27.3377C77.7052 27.8242 78.0675 28.2072 78.5281 28.4867C78.9888 28.761 79.5167 28.8981 80.1119 28.8981Z" fill="#3C593D"/>
<path d="M71.9658 29.6382V16.2852H73.0993V29.6382H71.9658Z" fill="#3C593D"/>
<path d="M68.1539 29.8864C67.5587 29.8864 67.0955 29.6871 66.7643 29.2886C66.4382 28.8849 66.2752 28.3182 66.2752 27.5884V22.5422H65.4678V21.6882H66.2752V18.7148H67.4086V21.6882H69.3883V22.5422H67.4086V27.5263C67.4086 27.9662 67.494 28.3026 67.6648 28.5355C67.8356 28.7684 68.0789 28.8849 68.3946 28.8849C68.6999 28.8849 68.9691 28.7995 69.202 28.6287L69.4892 29.5292C69.3132 29.6379 69.1062 29.7233 68.8681 29.7854C68.6301 29.8527 68.392 29.8864 68.1539 29.8864Z" fill="#3C593D"/>
<path d="M58.513 29.8841C57.7678 29.8841 57.0949 29.7029 56.4946 29.3406C55.8994 28.9732 55.4258 28.4711 55.0739 27.8345C54.7271 27.1979 54.5537 26.4708 54.5537 25.653C54.5537 24.8456 54.7245 24.1262 55.0661 23.4948C55.4129 22.8582 55.8838 22.3562 56.479 21.9887C57.0794 21.6212 57.7574 21.4375 58.513 21.4375C59.2324 21.4375 59.8846 21.6057 60.4694 21.9421C61.0543 22.2785 61.4994 22.7443 61.8047 23.3395V21.6859H62.9382V29.6356H61.8047V27.9587C61.4631 28.5591 61.0051 29.0301 60.4306 29.3717C59.8561 29.7133 59.2169 29.8841 58.513 29.8841ZM58.7925 28.8981C59.3877 28.8981 59.913 28.761 60.3685 28.4867C60.8291 28.2072 61.1914 27.8242 61.4554 27.3377C61.7193 26.846 61.8513 26.287 61.8513 25.6608C61.8513 25.0294 61.7193 24.4704 61.4554 23.9839C61.1966 23.4974 60.8369 23.117 60.3763 22.8427C59.9156 22.5632 59.3877 22.4235 58.7925 22.4235C58.1973 22.4235 57.6694 22.5632 57.2088 22.8427C56.7482 23.117 56.3859 23.4974 56.1219 23.9839C55.858 24.4704 55.726 25.0294 55.726 25.6608C55.726 26.287 55.858 26.846 56.1219 27.3377C56.3859 27.8242 56.7482 28.2072 57.2088 28.4867C57.6694 28.761 58.1973 28.8981 58.7925 28.8981Z" fill="#3C593D"/>
<path d="M5.41228 22.6607V0H6.76535V30.2143H5.41228L1.35307 7.55357V30.2143H0V0H1.35307L5.41228 22.6607Z" fill="#3C593D"/>
<path d="M13.6575 28.9006C14.024 28.9006 14.3341 28.7775 14.5878 28.5312C14.8697 28.2848 15.0106 27.9701 15.0106 27.587V2.62733C15.0106 2.27154 14.8697 1.9705 14.5878 1.72418C14.3341 1.4505 14.024 1.31366 13.6575 1.31366C13.2629 1.31366 12.9387 1.4505 12.685 1.72418C12.4313 1.9705 12.3045 2.27154 12.3045 2.62733V27.587C12.3045 27.9701 12.4313 28.2848 12.685 28.5312C12.9387 28.7775 13.2629 28.9006 13.6575 28.9006ZM13.6575 30.2143C12.8964 30.2143 12.2481 29.968 11.7125 29.4753C11.2051 28.9554 10.9514 28.3259 10.9514 27.587V2.62733C10.9514 1.91576 11.2051 1.29998 11.7125 0.779988C12.2481 0.259996 12.8964 0 13.6575 0C14.3905 0 15.0247 0.259996 15.5603 0.779988C16.0959 1.29998 16.3637 1.91576 16.3637 2.62733V27.587C16.3637 28.3259 16.0959 28.9554 15.5603 29.4753C15.0247 29.968 14.3905 30.2143 13.6575 30.2143Z" fill="#3C593D"/>
<path d="M28.3299 0H29.683V30.2143H28.3299V5.25466L24.9472 18.3913L21.5645 5.25466V30.2143H20.2115V0H21.5645L24.9472 13.1366L28.3299 0Z" fill="#3C593D"/>
<path d="M33.6999 30.2143V0H35.0529V30.2143H33.6999Z" fill="#3C593D"/>
<path d="M41.776 30.2143C41.0149 30.2143 40.3666 29.968 39.831 29.4753C39.3236 28.9554 39.0699 28.3259 39.0699 27.587V2.62733C39.0699 1.91576 39.3236 1.29998 39.831 0.779988C40.3666 0.259996 41.0149 0 41.776 0C42.5089 0 43.1432 0.259996 43.6788 0.779988C44.2143 1.29998 44.4821 1.91576 44.4821 2.62733V5.25466H43.1291V2.62733C43.1291 2.27154 42.9881 1.9705 42.7062 1.72418C42.4525 1.4505 42.1425 1.31366 41.776 1.31366C41.3814 1.31366 41.0572 1.4505 40.8035 1.72418C40.5498 1.9705 40.4229 2.27154 40.4229 2.62733V27.587C40.4229 27.9701 40.5498 28.2848 40.8035 28.5312C41.0572 28.7775 41.3814 28.9006 41.776 28.9006C42.1425 28.9006 42.4525 28.7775 42.7062 28.5312C42.9881 28.2848 43.1291 27.9701 43.1291 27.587V24.9596H44.4821V27.587C44.4821 28.3259 44.2143 28.9554 43.6788 29.4753C43.1432 29.968 42.5089 30.2143 41.776 30.2143Z" fill="#3C593D"/>
<path d="M56 1H91" stroke="#3C593D" stroke-linecap="round" stroke-dasharray="0.1 2"/>
</svg>

After

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -0,0 +1,28 @@
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
class _CommonNomic:
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
The key is the error type thrown to the caller
The value is the error type thrown by the model,
which needs to be converted into a unified error type for the caller.
:return: Invoke error mapping
"""
return {
InvokeConnectionError: [InvokeConnectionError],
InvokeServerUnavailableError: [InvokeServerUnavailableError],
InvokeRateLimitError: [InvokeRateLimitError],
InvokeAuthorizationError: [InvokeAuthorizationError],
InvokeBadRequestError: [KeyError, InvokeBadRequestError],
}

View File

@ -0,0 +1,26 @@
import logging
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
class NomicAtlasProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
"""
Validate provider credentials
if validate failed, raise exception
:param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
"""
try:
model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
model_instance.validate_credentials(model="nomic-embed-text-v1.5", credentials=credentials)
except CredentialsValidateFailedError as ex:
raise ex
except Exception as ex:
logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
raise ex

View File

@ -0,0 +1,29 @@
provider: nomic
label:
zh_Hans: Nomic Atlas
en_US: Nomic Atlas
icon_small:
en_US: icon_s_en.png
icon_large:
en_US: icon_l_en.svg
background: "#EFF1FE"
help:
title:
en_US: Get your API key from Nomic Atlas
zh_Hans: 从Nomic Atlas获取 API Key
url:
en_US: https://atlas.nomic.ai/data
supported_model_types:
- text-embedding
configurate_methods:
- predefined-model
provider_credential_schema:
credential_form_schemas:
- variable: nomic_api_key
label:
en_US: API Key
type: secret-input
required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key

View File

@ -0,0 +1,8 @@
model: nomic-embed-text-v1.5
model_type: text-embedding
model_properties:
context_size: 8192
pricing:
input: "0.1"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,8 @@
model: nomic-embed-text-v1
model_type: text-embedding
model_properties:
context_size: 8192
pricing:
input: "0.1"
unit: "0.000001"
currency: USD

Some files were not shown because too many files have changed in this diff Show More