diff --git a/api/.env.example b/api/.env.example index 68c54d7011..bbcb7cf1ec 100644 --- a/api/.env.example +++ b/api/.env.example @@ -137,6 +137,7 @@ SSRF_PROXY_HTTP_URL= SSRF_PROXY_HTTPS_URL= BATCH_UPLOAD_LIMIT=10 +KEYWORD_DATA_SOURCE_TYPE=database # CODE EXECUTION CONFIGURATION CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194 diff --git a/api/config.py b/api/config.py index dd6f359e6a..a55f9560a4 100644 --- a/api/config.py +++ b/api/config.py @@ -67,6 +67,7 @@ DEFAULTS = { 'CODE_EXECUTION_ENDPOINT': '', 'CODE_EXECUTION_API_KEY': '', 'TOOL_ICON_CACHE_MAX_AGE': 3600, + 'KEYWORD_DATA_SOURCE_TYPE': 'database', } @@ -97,7 +98,7 @@ class Config: # ------------------------ # General Configurations. # ------------------------ - self.CURRENT_VERSION = "0.5.11" + self.CURRENT_VERSION = "0.5.11-fix1" self.COMMIT_SHA = get_env('COMMIT_SHA') self.EDITION = "SELF_HOSTED" self.DEPLOY_ENV = get_env('DEPLOY_ENV') @@ -316,6 +317,7 @@ class Config: self.API_COMPRESSION_ENABLED = get_bool_env('API_COMPRESSION_ENABLED') self.TOOL_ICON_CACHE_MAX_AGE = get_env('TOOL_ICON_CACHE_MAX_AGE') + self.KEYWORD_DATA_SOURCE_TYPE = get_env('KEYWORD_DATA_SOURCE_TYPE') class CloudEditionConfig(Config): diff --git a/api/core/rag/datasource/keyword/jieba/jieba.py b/api/core/rag/datasource/keyword/jieba/jieba.py index 344ef7babe..46478ec131 100644 --- a/api/core/rag/datasource/keyword/jieba/jieba.py +++ b/api/core/rag/datasource/keyword/jieba/jieba.py @@ -2,6 +2,7 @@ import json from collections import defaultdict from typing import Any, Optional +from flask import current_app from pydantic import BaseModel from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaKeywordTableHandler @@ -9,6 +10,7 @@ from core.rag.datasource.keyword.keyword_base import BaseKeyword from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_redis import redis_client +from extensions.ext_storage import storage from models.dataset import Dataset, DatasetKeywordTable, DocumentSegment @@ -108,6 +110,9 @@ class Jieba(BaseKeyword): if dataset_keyword_table: db.session.delete(dataset_keyword_table) db.session.commit() + if dataset_keyword_table.data_source_type != 'database': + file_key = 'keyword_files/' + self.dataset.tenant_id + '/' + self.dataset.id + '.txt' + storage.delete(file_key) def _save_dataset_keyword_table(self, keyword_table): keyword_table_dict = { @@ -118,20 +123,34 @@ class Jieba(BaseKeyword): "table": keyword_table } } - self.dataset.dataset_keyword_table.keyword_table = json.dumps(keyword_table_dict, cls=SetEncoder) - db.session.commit() + dataset_keyword_table = self.dataset.dataset_keyword_table + keyword_data_source_type = dataset_keyword_table.data_source_type + if keyword_data_source_type == 'database': + dataset_keyword_table.keyword_table = json.dumps(keyword_table_dict, cls=SetEncoder) + db.session.commit() + else: + file_key = 'keyword_files/' + self.dataset.tenant_id + '/' + self.dataset.id + '.txt' + if storage.exists(file_key): + storage.delete(file_key) + storage.save(file_key, json.dumps(keyword_table_dict, cls=SetEncoder).encode('utf-8')) def _get_dataset_keyword_table(self) -> Optional[dict]: lock_name = 'keyword_indexing_lock_{}'.format(self.dataset.id) with redis_client.lock(lock_name, timeout=20): dataset_keyword_table = self.dataset.dataset_keyword_table if dataset_keyword_table: - if dataset_keyword_table.keyword_table_dict: - return dataset_keyword_table.keyword_table_dict['__data__']['table'] + keyword_table_dict = dataset_keyword_table.keyword_table_dict + if keyword_table_dict: + return keyword_table_dict['__data__']['table'] else: + keyword_data_source_type = current_app.config['KEYWORD_DATA_SOURCE_TYPE'] dataset_keyword_table = DatasetKeywordTable( dataset_id=self.dataset.id, - keyword_table=json.dumps({ + keyword_table='', + data_source_type=keyword_data_source_type, + ) + if keyword_data_source_type == 'database': + dataset_keyword_table.keyword_table = json.dumps({ '__type__': 'keyword_table', '__data__': { "index_id": self.dataset.id, @@ -139,7 +158,6 @@ class Jieba(BaseKeyword): "table": {} } }, cls=SetEncoder) - ) db.session.add(dataset_keyword_table) db.session.commit() diff --git a/api/core/tools/provider/_position.yaml b/api/core/tools/provider/_position.yaml index a69f37618e..7eb40b2ab8 100644 --- a/api/core/tools/provider/_position.yaml +++ b/api/core/tools/provider/_position.yaml @@ -25,3 +25,4 @@ - wecom - qrcode - dingtalk +- feishu diff --git a/api/core/tools/provider/builtin/feishu/_assets/icon.svg b/api/core/tools/provider/builtin/feishu/_assets/icon.svg new file mode 100644 index 0000000000..bf3c202abf --- /dev/null +++ b/api/core/tools/provider/builtin/feishu/_assets/icon.svg @@ -0,0 +1 @@ + diff --git a/api/core/tools/provider/builtin/feishu/feishu.py b/api/core/tools/provider/builtin/feishu/feishu.py new file mode 100644 index 0000000000..13303dbe64 --- /dev/null +++ b/api/core/tools/provider/builtin/feishu/feishu.py @@ -0,0 +1,8 @@ +from core.tools.provider.builtin.feishu.tools.feishu_group_bot import FeishuGroupBotTool +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +class FeishuProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict) -> None: + FeishuGroupBotTool() + pass diff --git a/api/core/tools/provider/builtin/feishu/feishu.yaml b/api/core/tools/provider/builtin/feishu/feishu.yaml new file mode 100644 index 0000000000..a1fcd38047 --- /dev/null +++ b/api/core/tools/provider/builtin/feishu/feishu.yaml @@ -0,0 +1,13 @@ +identity: + author: Arkii Sun + name: feishu + label: + en_US: Feishu + zh_Hans: 飞书 + pt_BR: Feishu + description: + en_US: Feishu group bot + zh_Hans: 飞书群机器人 + pt_BR: Feishu group bot + icon: icon.svg +credentials_for_provider: diff --git a/api/core/tools/provider/builtin/feishu/tools/feishu_group_bot.py b/api/core/tools/provider/builtin/feishu/tools/feishu_group_bot.py new file mode 100644 index 0000000000..e8ab02f55e --- /dev/null +++ b/api/core/tools/provider/builtin/feishu/tools/feishu_group_bot.py @@ -0,0 +1,50 @@ +from typing import Any, Union + +import httpx + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool +from core.tools.utils.uuid_utils import is_valid_uuid + + +class FeishuGroupBotTool(BuiltinTool): + def _invoke(self, user_id: str, tool_parameters: dict[str, Any] + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + """ + invoke tools + API document: https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot + """ + + url = "https://open.feishu.cn/open-apis/bot/v2/hook" + + content = tool_parameters.get('content', '') + if not content: + return self.create_text_message('Invalid parameter content') + + hook_key = tool_parameters.get('hook_key', '') + if not is_valid_uuid(hook_key): + return self.create_text_message( + f'Invalid parameter hook_key ${hook_key}, not a valid UUID') + + msg_type = 'text' + api_url = f'{url}/{hook_key}' + headers = { + 'Content-Type': 'application/json', + } + params = {} + payload = { + "msg_type": msg_type, + "content": { + "text": content, + } + } + + try: + res = httpx.post(api_url, headers=headers, params=params, json=payload) + if res.is_success: + return self.create_text_message("Text message sent successfully") + else: + return self.create_text_message( + f"Failed to send the text message, status code: {res.status_code}, response: {res.text}") + except Exception as e: + return self.create_text_message("Failed to send message to group chat bot. {}".format(e)) \ No newline at end of file diff --git a/api/core/tools/provider/builtin/feishu/tools/feishu_group_bot.yaml b/api/core/tools/provider/builtin/feishu/tools/feishu_group_bot.yaml new file mode 100644 index 0000000000..6c3f084e4d --- /dev/null +++ b/api/core/tools/provider/builtin/feishu/tools/feishu_group_bot.yaml @@ -0,0 +1,40 @@ +identity: + name: feishu_group_bot + author: Arkii Sun + label: + en_US: Send Group Message + zh_Hans: 发送群消息 + pt_BR: Send Group Message + icon: icon.png +description: + human: + en_US: Sending a group message on Feishu via the webhook of group bot + zh_Hans: 通过飞书的群机器人webhook发送群消息 + pt_BR: Sending a group message on Feishu via the webhook of group bot + llm: A tool for sending messages to a chat group on Feishu(飞书) . +parameters: + - name: hook_key + type: secret-input + required: true + label: + en_US: Feishu Group bot webhook key + zh_Hans: 群机器人webhook的key + pt_BR: Feishu Group bot webhook key + human_description: + en_US: Feishu Group bot webhook key + zh_Hans: 群机器人webhook的key + pt_BR: Feishu Group bot webhook key + form: form + - name: content + type: string + required: true + label: + en_US: content + zh_Hans: 消息内容 + pt_BR: content + human_description: + en_US: Content to sent to the group. + zh_Hans: 群消息文本 + pt_BR: Content to sent to the group. + llm_description: Content of the message + form: llm diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 3a8e314d92..943cf4f58d 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -172,6 +172,20 @@ class Storage: return os.path.exists(filename) + def delete(self, filename): + if self.storage_type == 's3': + self.client.delete_object(Bucket=self.bucket_name, Key=filename) + elif self.storage_type == 'azure-blob': + blob_container = self.client.get_container_client(container=self.bucket_name) + blob_container.delete_blob(filename) + else: + if not self.folder or self.folder.endswith('/'): + filename = self.folder + filename + else: + filename = self.folder + '/' + filename + if os.path.exists(filename): + os.remove(filename) + storage = Storage() diff --git a/api/migrations/versions/17b5ab037c40_add_keyworg_table_storage_type.py b/api/migrations/versions/17b5ab037c40_add_keyworg_table_storage_type.py new file mode 100644 index 0000000000..7707148489 --- /dev/null +++ b/api/migrations/versions/17b5ab037c40_add_keyworg_table_storage_type.py @@ -0,0 +1,33 @@ +"""add-keyworg-table-storage-type + +Revision ID: 17b5ab037c40 +Revises: a8f9b3c45e4a +Create Date: 2024-04-01 09:48:54.232201 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = '17b5ab037c40' +down_revision = 'a8f9b3c45e4a' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + with op.batch_alter_table('dataset_keyword_tables', schema=None) as batch_op: + batch_op.add_column(sa.Column('data_source_type', sa.String(length=255), server_default=sa.text("'database'::character varying"), nullable=False)) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + with op.batch_alter_table('dataset_keyword_tables', schema=None) as batch_op: + batch_op.drop_column('data_source_type') + + # ### end Alembic commands ### diff --git a/api/models/dataset.py b/api/models/dataset.py index 031bbe4dc7..f90fc9abb7 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -1,4 +1,5 @@ import json +import logging import pickle from json import JSONDecodeError @@ -6,6 +7,7 @@ from sqlalchemy import func from sqlalchemy.dialects.postgresql import JSONB, UUID from extensions.ext_database import db +from extensions.ext_storage import storage from models.account import Account from models.model import App, UploadFile @@ -441,6 +443,7 @@ class DatasetKeywordTable(db.Model): id = db.Column(UUID, primary_key=True, server_default=db.text('uuid_generate_v4()')) dataset_id = db.Column(UUID, nullable=False, unique=True) keyword_table = db.Column(db.Text, nullable=False) + data_source_type = db.Column(db.String(255), nullable=False, server_default=db.text("'database'::character varying")) @property def keyword_table_dict(self): @@ -454,8 +457,24 @@ class DatasetKeywordTable(db.Model): if isinstance(node_idxs, list): dct[keyword] = set(node_idxs) return dct - - return json.loads(self.keyword_table, cls=SetDecoder) if self.keyword_table else None + # get dataset + dataset = Dataset.query.filter_by( + id=self.dataset_id + ).first() + if not dataset: + return None + if self.data_source_type == 'database': + return json.loads(self.keyword_table, cls=SetDecoder) if self.keyword_table else None + else: + file_key = 'keyword_files/' + dataset.tenant_id + '/' + self.dataset_id + '.txt' + try: + keyword_table_text = storage.load_once(file_key) + if keyword_table_text: + return json.loads(keyword_table_text.decode('utf-8'), cls=SetDecoder) + return None + except Exception as e: + logging.exception(str(e)) + return None class Embedding(db.Model): diff --git a/web/package.json b/web/package.json index a40d71115a..f5fbb7687c 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "dify-web", - "version": "0.5.11", + "version": "0.5.11-fix1", "private": true, "scripts": { "dev": "next dev",