From 680c1bd41db203ae6a3f2175d27f55a766369a34 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 24 Sep 2024 21:37:55 +0800 Subject: [PATCH 1/6] remove description --- api/controllers/console/datasets/external.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index 05effa6242..a5983c812a 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -23,7 +23,7 @@ def _validate_name(name): def _validate_description_length(description): - if len(description) > 400: + if description and len(description) > 400: raise ValueError("Description cannot exceed 400 characters.") return description @@ -61,13 +61,6 @@ class ExternalApiTemplateListApi(Resource): help="Name is required. Name must be between 1 to 100 characters.", type=_validate_name, ) - parser.add_argument( - "description", - nullable=True, - required=False, - help="Description is required. Description must be between 1 to 400 characters.", - type=_validate_description_length, - ) parser.add_argument( "settings", type=dict, @@ -119,13 +112,6 @@ class ExternalApiTemplateApi(Resource): help="type is required. Name must be between 1 to 100 characters.", type=_validate_name, ) - parser.add_argument( - "description", - nullable=True, - required=False, - help="description is required. Description must be between 1 to 400 characters.", - type=_validate_description_length, - ) parser.add_argument( "settings", type=dict, From a53b4fb2ff835077012fdc436bf900ef5d64f737 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 24 Sep 2024 22:28:23 +0800 Subject: [PATCH 2/6] remove description --- api/controllers/console/datasets/external.py | 4 ++-- api/models/dataset.py | 14 ++++++++++++++ api/services/external_knowledge_service.py | 6 +++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index a5983c812a..4eb43af332 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -152,8 +152,8 @@ class ExternalApiUseCheckApi(Resource): def get(self, external_knowledge_api_id): external_knowledge_api_id = str(external_knowledge_api_id) - external_api_template_is_using = ExternalDatasetService.external_api_template_use_check(external_knowledge_api_id) - return {"is_using": external_api_template_is_using}, 200 + external_api_template_is_using, count = ExternalDatasetService.external_api_template_use_check(external_knowledge_api_id) + return {"is_using": external_api_template_is_using, "count": count}, 200 class ExternalDatasetInitApi(Resource): diff --git a/api/models/dataset.py b/api/models/dataset.py index ecf2c244e6..f5e8be970c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -723,6 +723,7 @@ class ExternalApiTemplates(db.Model): "name": self.name, "description": self.description, "settings": self.settings_dict, + "dataset_bindings": self.dataset_bindings, "created_by": self.created_by, "created_at": self.created_at.isoformat(), } @@ -733,7 +734,20 @@ class ExternalApiTemplates(db.Model): return json.loads(self.settings) if self.settings else None except JSONDecodeError: return None + + @property + def dataset_bindings(self): + external_knowledge_bindings = db.session.query(ExternalKnowledgeBindings).filter(ExternalKnowledgeBindings.external_api_template_id == self.id).all() + dataset_ids = [binding.dataset_id for binding in external_knowledge_bindings] + datasets = db.session.query(Dataset).filter(Dataset.id.in_(dataset_ids)).all() + dataset_bindings = [] + for dataset in datasets: + dataset_bindings.append({ + "id": dataset.id, + "name": dataset.name + }) + return dataset_bindings class ExternalKnowledgeBindings(db.Model): __tablename__ = "external_knowledge_bindings" diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index 8d03493a14..58006ea088 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -89,11 +89,11 @@ class ExternalDatasetService: db.session.commit() @staticmethod - def external_api_template_use_check(external_knowledge_api_id: str) -> bool: + def external_api_template_use_check(external_knowledge_api_id: str) -> tuple[bool, int]: count = ExternalKnowledgeBindings.query.filter_by(external_api_template_id=external_knowledge_api_id).count() if count > 0: - return True - return False + return True, count + return False, 0 @staticmethod def get_external_knowledge_binding_with_dataset_id(tenant_id: str, dataset_id: str) -> ExternalKnowledgeBindings: From a258f8dfdf3adb95b8e18554b5d95bb35b1b2e70 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 24 Sep 2024 23:32:23 +0800 Subject: [PATCH 3/6] remove description --- api/controllers/console/datasets/datasets.py | 27 ++++ api/controllers/console/datasets/external.py | 4 +- api/core/rag/retrieval/dataset_retrieval.py | 6 +- api/fields/dataset_fields.py | 8 + api/models/dataset.py | 37 ++++- api/services/dataset_service.py | 158 +++++++++++-------- 6 files changed, 160 insertions(+), 80 deletions(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index ebc5d31e7e..554a0bc0f9 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -234,6 +234,33 @@ class DatasetApi(Resource): ) parser.add_argument("retrieval_model", type=dict, location="json", help="Invalid retrieval model.") parser.add_argument("partial_member_list", type=list, location="json", help="Invalid parent user list.") + + parser.add_argument( + "external_retrieval_model", + type=dict, + required=False, + nullable=True, + location="json", + help="Invalid external retrieval model.", + ) + + parser.add_argument( + "external_knowledge_id", + type=str, + required=False, + nullable=True, + location="json", + help="Invalid external knowledge id.", + ) + + parser.add_argument( + "external_knowledge_api_id", + type=str, + required=False, + nullable=True, + location="json", + help="Invalid external knowledge api id.", + ) args = parser.parse_args() data = request.get_json() diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index 4eb43af332..1bc7ffdf49 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -152,7 +152,9 @@ class ExternalApiUseCheckApi(Resource): def get(self, external_knowledge_api_id): external_knowledge_api_id = str(external_knowledge_api_id) - external_api_template_is_using, count = ExternalDatasetService.external_api_template_use_check(external_knowledge_api_id) + external_api_template_is_using, count = ExternalDatasetService.external_api_template_use_check( + external_knowledge_api_id + ) return {"is_using": external_api_template_is_using, "count": count}, 200 diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index c23b52cf55..ae61ba7112 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -112,11 +112,7 @@ class DatasetRetrieval: continue # pass if dataset is not available - if ( - dataset - and dataset.available_document_count == 0 - and dataset.provider != "external" - ): + if dataset and dataset.available_document_count == 0 and dataset.provider != "external": continue available_datasets.append(dataset) diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index 9cf8da7acd..7aee087d78 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -41,6 +41,13 @@ dataset_retrieval_model_fields = { tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String} +external_knowledge_info_fields = { + "external_knowledge_id": fields.String, + "external_knowledge_api_id": fields.String, + "external_knowledge_api_name": fields.String, + "external_knowledge_api_endpoint": fields.String, +} + dataset_detail_fields = { "id": fields.String, "name": fields.String, @@ -61,6 +68,7 @@ dataset_detail_fields = { "embedding_available": fields.Boolean, "retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields), "tags": fields.List(fields.Nested(tag_fields)), + "external_knowledge_info": fields.Nested(external_knowledge_info_fields), } dataset_query_detail_fields = { diff --git a/api/models/dataset.py b/api/models/dataset.py index f5e8be970c..c61d467956 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -171,6 +171,29 @@ class Dataset(db.Model): return tags or [] + @property + def external_knowledge_info(self): + if self.provider != "external": + return None + external_knowledge_binding = ( + db.session.query(ExternalKnowledgeBindings).filter(ExternalKnowledgeBindings.dataset_id == self.id).first() + ) + if not external_knowledge_binding: + return None + external_api_template = ( + db.session.query(ExternalApiTemplates) + .filter(ExternalApiTemplates.id == external_knowledge_binding.external_api_template_id) + .first() + ) + if not external_api_template: + return None + return { + "external_knowledge_id": external_knowledge_binding.external_knowledge_id, + "external_knowledge_api_id": external_api_template.id, + "external_knowledge_api_name": external_api_template.name, + "external_knowledge_api_endpoint": json.loads(external_api_template.settings).get("endpoint", ""), + } + @staticmethod def gen_collection_name_by_id(dataset_id: str) -> str: normalized_dataset_id = dataset_id.replace("-", "_") @@ -734,21 +757,23 @@ class ExternalApiTemplates(db.Model): return json.loads(self.settings) if self.settings else None except JSONDecodeError: return None - + @property def dataset_bindings(self): - external_knowledge_bindings = db.session.query(ExternalKnowledgeBindings).filter(ExternalKnowledgeBindings.external_api_template_id == self.id).all() + external_knowledge_bindings = ( + db.session.query(ExternalKnowledgeBindings) + .filter(ExternalKnowledgeBindings.external_api_template_id == self.id) + .all() + ) dataset_ids = [binding.dataset_id for binding in external_knowledge_bindings] datasets = db.session.query(Dataset).filter(Dataset.id.in_(dataset_ids)).all() dataset_bindings = [] for dataset in datasets: - dataset_bindings.append({ - "id": dataset.id, - "name": dataset.name - }) + dataset_bindings.append({"id": dataset.id, "name": dataset.name}) return dataset_bindings + class ExternalKnowledgeBindings(db.Model): __tablename__ = "external_knowledge_bindings" __table_args__ = ( diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 699d2d64f4..a3a5d7b84f 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -184,7 +184,7 @@ class DatasetService: return dataset @staticmethod - def get_dataset(dataset_id): + def get_dataset(dataset_id) -> Dataset: return Dataset.query.filter_by(id=dataset_id).first() @staticmethod @@ -225,81 +225,103 @@ class DatasetService: @staticmethod def update_dataset(dataset_id, data, user): - data.pop("partial_member_list", None) - filtered_data = {k: v for k, v in data.items() if v is not None or k == "description"} dataset = DatasetService.get_dataset(dataset_id) + DatasetService.check_dataset_permission(dataset, user) - action = None - if dataset.indexing_technique != data["indexing_technique"]: - # if update indexing_technique - if data["indexing_technique"] == "economy": - action = "remove" - filtered_data["embedding_model"] = None - filtered_data["embedding_model_provider"] = None - filtered_data["collection_binding_id"] = None - elif data["indexing_technique"] == "high_quality": - action = "add" - # get embedding model setting - try: - model_manager = ModelManager() - embedding_model = model_manager.get_model_instance( - tenant_id=current_user.current_tenant_id, - provider=data["embedding_model_provider"], - model_type=ModelType.TEXT_EMBEDDING, - model=data["embedding_model"], - ) - filtered_data["embedding_model"] = embedding_model.model - filtered_data["embedding_model_provider"] = embedding_model.provider - dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding( - embedding_model.provider, embedding_model.model - ) - filtered_data["collection_binding_id"] = dataset_collection_binding.id - except LLMBadRequestError: - raise ValueError( - "No Embedding Model available. Please configure a valid provider " - "in the Settings -> Model Provider." - ) - except ProviderTokenNotInitError as ex: - raise ValueError(ex.description) - else: + if dataset.provider == "external": + dataset.retrieval_model = data.get("external_retrieval_model", None) + dataset.name = data.get("name", dataset.name) + dataset.description = data.get("description", "") + external_knowledge_id = data.get("external_knowledge_id", None) + db.session.add(dataset) + if not external_knowledge_id: + raise ValueError("External knowledge id is required.") + external_knowledge_api_id = data.get("external_knowledge_api_id", None) + if not external_knowledge_api_id: + raise ValueError("External knowledge api id is required.") + external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(dataset_id=dataset_id).first() if ( - data["embedding_model_provider"] != dataset.embedding_model_provider - or data["embedding_model"] != dataset.embedding_model + external_knowledge_binding.external_knowledge_id != external_knowledge_id + or external_knowledge_binding.external_knowledge_api_id != external_knowledge_api_id ): - action = "update" - try: - model_manager = ModelManager() - embedding_model = model_manager.get_model_instance( - tenant_id=current_user.current_tenant_id, - provider=data["embedding_model_provider"], - model_type=ModelType.TEXT_EMBEDDING, - model=data["embedding_model"], - ) - filtered_data["embedding_model"] = embedding_model.model - filtered_data["embedding_model_provider"] = embedding_model.provider - dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding( - embedding_model.provider, embedding_model.model - ) - filtered_data["collection_binding_id"] = dataset_collection_binding.id - except LLMBadRequestError: - raise ValueError( - "No Embedding Model available. Please configure a valid provider " - "in the Settings -> Model Provider." - ) - except ProviderTokenNotInitError as ex: - raise ValueError(ex.description) + external_knowledge_binding.external_knowledge_id = external_knowledge_id + external_knowledge_binding.external_knowledge_api_id = external_knowledge_api_id + db.session.add(external_knowledge_binding) + db.session.commit() + else: + data.pop("partial_member_list", None) + filtered_data = {k: v for k, v in data.items() if v is not None or k == "description"} + action = None + if dataset.indexing_technique != data["indexing_technique"]: + # if update indexing_technique + if data["indexing_technique"] == "economy": + action = "remove" + filtered_data["embedding_model"] = None + filtered_data["embedding_model_provider"] = None + filtered_data["collection_binding_id"] = None + elif data["indexing_technique"] == "high_quality": + action = "add" + # get embedding model setting + try: + model_manager = ModelManager() + embedding_model = model_manager.get_model_instance( + tenant_id=current_user.current_tenant_id, + provider=data["embedding_model_provider"], + model_type=ModelType.TEXT_EMBEDDING, + model=data["embedding_model"], + ) + filtered_data["embedding_model"] = embedding_model.model + filtered_data["embedding_model_provider"] = embedding_model.provider + dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding( + embedding_model.provider, embedding_model.model + ) + filtered_data["collection_binding_id"] = dataset_collection_binding.id + except LLMBadRequestError: + raise ValueError( + "No Embedding Model available. Please configure a valid provider " + "in the Settings -> Model Provider." + ) + except ProviderTokenNotInitError as ex: + raise ValueError(ex.description) + else: + if ( + data["embedding_model_provider"] != dataset.embedding_model_provider + or data["embedding_model"] != dataset.embedding_model + ): + action = "update" + try: + model_manager = ModelManager() + embedding_model = model_manager.get_model_instance( + tenant_id=current_user.current_tenant_id, + provider=data["embedding_model_provider"], + model_type=ModelType.TEXT_EMBEDDING, + model=data["embedding_model"], + ) + filtered_data["embedding_model"] = embedding_model.model + filtered_data["embedding_model_provider"] = embedding_model.provider + dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding( + embedding_model.provider, embedding_model.model + ) + filtered_data["collection_binding_id"] = dataset_collection_binding.id + except LLMBadRequestError: + raise ValueError( + "No Embedding Model available. Please configure a valid provider " + "in the Settings -> Model Provider." + ) + except ProviderTokenNotInitError as ex: + raise ValueError(ex.description) - filtered_data["updated_by"] = user.id - filtered_data["updated_at"] = datetime.datetime.now() + filtered_data["updated_by"] = user.id + filtered_data["updated_at"] = datetime.datetime.now() - # update Retrieval model - filtered_data["retrieval_model"] = data["retrieval_model"] + # update Retrieval model + filtered_data["retrieval_model"] = data["retrieval_model"] - dataset.query.filter_by(id=dataset_id).update(filtered_data) + dataset.query.filter_by(id=dataset_id).update(filtered_data) - db.session.commit() - if action: - deal_dataset_vector_index_task.delay(dataset_id, action) + db.session.commit() + if action: + deal_dataset_vector_index_task.delay(dataset_id, action) return dataset @staticmethod From 02b06c420e244a429acb1e3183b6fc46fb482d07 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 24 Sep 2024 23:52:01 +0800 Subject: [PATCH 4/6] add external_retrieval_model --- api/fields/dataset_fields.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index 7aee087d78..b32423f10c 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -38,6 +38,10 @@ dataset_retrieval_model_fields = { "score_threshold_enabled": fields.Boolean, "score_threshold": fields.Float, } +external_retrieval_model_fields = { + "top_k": fields.Integer, + "score_threshold": fields.Float, +} tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String} @@ -69,6 +73,7 @@ dataset_detail_fields = { "retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields), "tags": fields.List(fields.Nested(tag_fields)), "external_knowledge_info": fields.Nested(external_knowledge_info_fields), + "external_retrieval_model": fields.Nested(external_retrieval_model_fields, allow_null=True), } dataset_query_detail_fields = { From a69dcb8bee379fa684ce05eee8137aecbae5de6c Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 25 Sep 2024 10:57:12 +0800 Subject: [PATCH 5/6] add external_retrieval_model --- api/controllers/console/datasets/external.py | 14 +++++++------- api/services/external_knowledge_service.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index 1bc7ffdf49..faca8a2a45 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -101,8 +101,8 @@ class ExternalApiTemplateApi(Resource): @setup_required @login_required @account_initialization_required - def patch(self, api_template_id): - api_template_id = str(api_template_id) + def patch(self, external_knowledge_api_id): + external_knowledge_api_id = str(external_knowledge_api_id) parser = reqparse.RequestParser() parser.add_argument( @@ -125,7 +125,7 @@ class ExternalApiTemplateApi(Resource): api_template = ExternalDatasetService.update_api_template( tenant_id=current_user.current_tenant_id, user_id=current_user.id, - api_template_id=api_template_id, + external_knowledge_api_id=external_knowledge_api_id, args=args, ) @@ -134,15 +134,15 @@ class ExternalApiTemplateApi(Resource): @setup_required @login_required @account_initialization_required - def delete(self, api_template_id): - api_template_id = str(api_template_id) + def delete(self, external_knowledge_api_id): + external_knowledge_api_id = str(external_knowledge_api_id) # The role of the current user in the ta table must be admin, owner, or editor if not current_user.is_editor or current_user.is_dataset_operator: raise Forbidden() - ExternalDatasetService.delete_api_template(current_user.current_tenant_id, api_template_id) - return {"result": "success"}, 204 + ExternalDatasetService.delete_api_template(current_user.current_tenant_id, external_knowledge_api_id) + return {"result": "success"}, 200 class ExternalApiUseCheckApi(Resource): diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index 58006ea088..9810c253ba 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -65,8 +65,8 @@ class ExternalDatasetService: return ExternalApiTemplates.query.filter_by(id=external_knowledge_api_id).first() @staticmethod - def update_api_template(tenant_id, user_id, api_template_id, args) -> ExternalApiTemplates: - api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first() + def update_api_template(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalApiTemplates: + api_template = ExternalApiTemplates.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() if api_template is None: raise ValueError("api template not found") @@ -80,8 +80,8 @@ class ExternalDatasetService: return api_template @staticmethod - def delete_api_template(tenant_id: str, api_template_id: str): - api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first() + def delete_api_template(tenant_id: str, external_knowledge_api_id: str): + api_template = ExternalApiTemplates.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() if api_template is None: raise ValueError("api template not found") From c927c97310bc1a8a5fb53e87e59361ccf3a54b98 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 25 Sep 2024 12:37:23 +0800 Subject: [PATCH 6/6] update to external knowledge api --- api/controllers/console/datasets/datasets.py | 4 +- api/controllers/console/datasets/external.py | 32 +++--- .../service_api/dataset/dataset.py | 4 +- api/fields/external_dataset_fields.py | 2 +- ...-6af6a521a53e_update_retrieval_resource.py | 2 +- ...34-33f5fac87f29_external_knowledge_api.py} | 37 ++++--- api/models/dataset.py | 30 +++--- api/services/dataset_service.py | 10 +- .../external_knowledge_entities.py | 2 +- api/services/external_knowledge_service.py | 100 +++++++++--------- api/tasks/external_document_indexing_task.py | 18 ++-- 11 files changed, 120 insertions(+), 121 deletions(-) rename api/migrations/versions/{2024_09_18_0659-ec3df697ebbb_external_knowledge.py => 2024_09_25_0434-33f5fac87f29_external_knowledge_api.py} (74%) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 554a0bc0f9..d00dbbbea3 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -111,7 +111,7 @@ class DatasetListApi(Resource): help="Invalid indexing technique.", ) parser.add_argument( - "external_api_template_id", + "external_knowledge_api_id", type=str, nullable=True, required=False, @@ -144,7 +144,7 @@ class DatasetListApi(Resource): account=current_user, permission=DatasetPermissionEnum.ONLY_ME, provider=args["provider"], - external_api_template_id=args["external_api_template_id"], + external_knowledge_api_id=args["external_knowledge_api_id"], external_knowledge_id=args["external_knowledge_id"], ) except services.errors.dataset.DatasetNameDuplicateError: diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index faca8a2a45..4c840d8aba 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -37,12 +37,12 @@ class ExternalApiTemplateListApi(Resource): limit = request.args.get("limit", default=20, type=int) search = request.args.get("keyword", default=None, type=str) - api_templates, total = ExternalDatasetService.get_external_api_templates( + external_knowledge_apis, total = ExternalDatasetService.get_external_knowledge_apis( page, limit, current_user.current_tenant_id, search ) response = { - "data": [item.to_dict() for item in api_templates], - "has_more": len(api_templates) == limit, + "data": [item.to_dict() for item in external_knowledge_apis], + "has_more": len(external_knowledge_apis) == limit, "limit": limit, "total": total, "page": page, @@ -77,13 +77,13 @@ class ExternalApiTemplateListApi(Resource): raise Forbidden() try: - api_template = ExternalDatasetService.create_api_template( + external_knowledge_api = ExternalDatasetService.create_external_knowledge_api( tenant_id=current_user.current_tenant_id, user_id=current_user.id, args=args ) except services.errors.dataset.DatasetNameDuplicateError: raise DatasetNameDuplicateError() - return api_template.to_dict(), 201 + return external_knowledge_api.to_dict(), 201 class ExternalApiTemplateApi(Resource): @@ -92,11 +92,11 @@ class ExternalApiTemplateApi(Resource): @account_initialization_required def get(self, external_knowledge_api_id): external_knowledge_api_id = str(external_knowledge_api_id) - api_template = ExternalDatasetService.get_api_template(external_knowledge_api_id) - if api_template is None: + external_knowledge_api = ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id) + if external_knowledge_api is None: raise NotFound("API template not found.") - return api_template.to_dict(), 200 + return external_knowledge_api.to_dict(), 200 @setup_required @login_required @@ -122,14 +122,14 @@ class ExternalApiTemplateApi(Resource): args = parser.parse_args() ExternalDatasetService.validate_api_list(args["settings"]) - api_template = ExternalDatasetService.update_api_template( + external_knowledge_api = ExternalDatasetService.update_external_knowledge_api( tenant_id=current_user.current_tenant_id, user_id=current_user.id, external_knowledge_api_id=external_knowledge_api_id, args=args, ) - return api_template.to_dict(), 200 + return external_knowledge_api.to_dict(), 200 @setup_required @login_required @@ -141,7 +141,7 @@ class ExternalApiTemplateApi(Resource): if not current_user.is_editor or current_user.is_dataset_operator: raise Forbidden() - ExternalDatasetService.delete_api_template(current_user.current_tenant_id, external_knowledge_api_id) + ExternalDatasetService.delete_external_knowledge_api(current_user.current_tenant_id, external_knowledge_api_id) return {"result": "success"}, 200 @@ -152,10 +152,10 @@ class ExternalApiUseCheckApi(Resource): def get(self, external_knowledge_api_id): external_knowledge_api_id = str(external_knowledge_api_id) - external_api_template_is_using, count = ExternalDatasetService.external_api_template_use_check( + external_knowledge_api_is_using, count = ExternalDatasetService.external_knowledge_api_use_check( external_knowledge_api_id ) - return {"is_using": external_api_template_is_using, "count": count}, 200 + return {"is_using": external_knowledge_api_is_using, "count": count}, 200 class ExternalDatasetInitApi(Resource): @@ -168,7 +168,7 @@ class ExternalDatasetInitApi(Resource): raise Forbidden() parser = reqparse.RequestParser() - parser.add_argument("api_template_id", type=str, required=True, nullable=True, location="json") + parser.add_argument("external_knowledge_api_id", type=str, required=True, nullable=True, location="json") # parser.add_argument('name', nullable=False, required=True, # help='name is required. Name must be between 1 to 100 characters.', # type=_validate_name) @@ -184,7 +184,7 @@ class ExternalDatasetInitApi(Resource): # validate args ExternalDatasetService.document_create_args_validate( - current_user.current_tenant_id, args["api_template_id"], args["process_parameter"] + current_user.current_tenant_id, args["external_knowledge_api_id"], args["process_parameter"] ) try: @@ -210,7 +210,7 @@ class ExternalDatasetCreateApi(Resource): raise Forbidden() parser = reqparse.RequestParser() - parser.add_argument("external_api_template_id", type=str, required=True, nullable=False, location="json") + parser.add_argument("external_knowledge_api_id", type=str, required=True, nullable=False, location="json") parser.add_argument("external_knowledge_id", type=str, required=True, nullable=False, location="json") parser.add_argument( "name", diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index 7483b4b4d6..f076cff6c8 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -83,7 +83,7 @@ class DatasetListApi(DatasetApiResource): nullable=False, ) parser.add_argument( - "external_api_template_id", + "external_knowledge_api_id", type=str, nullable=True, required=False, @@ -112,7 +112,7 @@ class DatasetListApi(DatasetApiResource): account=current_user, permission=args["permission"], provider=args["provider"], - external_api_template_id=args["external_api_template_id"], + external_knowledge_api_id=args["external_knowledge_api_id"], external_knowledge_id=args["external_knowledge_id"], ) except services.errors.dataset.DatasetNameDuplicateError: diff --git a/api/fields/external_dataset_fields.py b/api/fields/external_dataset_fields.py index d287cbbe58..2281460fe2 100644 --- a/api/fields/external_dataset_fields.py +++ b/api/fields/external_dataset_fields.py @@ -2,7 +2,7 @@ from flask_restful import fields from libs.helper import TimestampField -api_template_query_detail_fields = { +external_knowledge_api_query_detail_fields = { "id": fields.String, "name": fields.String, "setting": fields.String, diff --git a/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py b/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py index c79b7759db..5337b340db 100644 --- a/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py +++ b/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py @@ -12,7 +12,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = '6af6a521a53e' -down_revision = 'ec3df697ebbb' +down_revision = 'd57ba9ebb251' branch_labels = None depends_on = None diff --git a/api/migrations/versions/2024_09_18_0659-ec3df697ebbb_external_knowledge.py b/api/migrations/versions/2024_09_25_0434-33f5fac87f29_external_knowledge_api.py similarity index 74% rename from api/migrations/versions/2024_09_18_0659-ec3df697ebbb_external_knowledge.py rename to api/migrations/versions/2024_09_25_0434-33f5fac87f29_external_knowledge_api.py index a5b7d47e67..3cb76e72c1 100644 --- a/api/migrations/versions/2024_09_18_0659-ec3df697ebbb_external_knowledge.py +++ b/api/migrations/versions/2024_09_25_0434-33f5fac87f29_external_knowledge_api.py @@ -1,8 +1,8 @@ -"""external_knowledge +"""external_knowledge_api -Revision ID: ec3df697ebbb -Revises: 675b5321501b -Create Date: 2024-09-18 06:59:54.048478 +Revision ID: 33f5fac87f29 +Revises: 6af6a521a53e +Create Date: 2024-09-25 04:34:57.249436 """ from alembic import op @@ -11,15 +11,15 @@ import sqlalchemy as sa from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = 'ec3df697ebbb' -down_revision = '675b5321501b' +revision = '33f5fac87f29' +down_revision = '6af6a521a53e' branch_labels = None depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('external_api_templates', + op.create_table('external_knowledge_apis', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), sa.Column('name', sa.String(length=255), nullable=False), sa.Column('description', sa.String(length=255), nullable=False), @@ -29,16 +29,16 @@ def upgrade(): sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), sa.Column('updated_by', models.types.StringUUID(), nullable=True), sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='external_api_template_pkey') + sa.PrimaryKeyConstraint('id', name='external_knowledge_apis_pkey') ) - with op.batch_alter_table('external_api_templates', schema=None) as batch_op: - batch_op.create_index('external_api_templates_name_idx', ['name'], unique=False) - batch_op.create_index('external_api_templates_tenant_idx', ['tenant_id'], unique=False) + with op.batch_alter_table('external_knowledge_apis', schema=None) as batch_op: + batch_op.create_index('external_knowledge_apis_name_idx', ['name'], unique=False) + batch_op.create_index('external_knowledge_apis_tenant_idx', ['tenant_id'], unique=False) op.create_table('external_knowledge_bindings', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('external_api_template_id', models.types.StringUUID(), nullable=False), + sa.Column('external_knowledge_api_id', models.types.StringUUID(), nullable=False), sa.Column('dataset_id', models.types.StringUUID(), nullable=False), sa.Column('external_knowledge_id', sa.Text(), nullable=False), sa.Column('created_by', models.types.StringUUID(), nullable=False), @@ -49,7 +49,7 @@ def upgrade(): ) with op.batch_alter_table('external_knowledge_bindings', schema=None) as batch_op: batch_op.create_index('external_knowledge_bindings_dataset_idx', ['dataset_id'], unique=False) - batch_op.create_index('external_knowledge_bindings_external_api_template_idx', ['external_api_template_id'], unique=False) + batch_op.create_index('external_knowledge_bindings_external_knowledge_api_idx', ['external_knowledge_api_id'], unique=False) batch_op.create_index('external_knowledge_bindings_external_knowledge_idx', ['external_knowledge_id'], unique=False) batch_op.create_index('external_knowledge_bindings_tenant_idx', ['tenant_id'], unique=False) @@ -58,17 +58,16 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('external_knowledge_bindings', schema=None) as batch_op: batch_op.drop_index('external_knowledge_bindings_tenant_idx') batch_op.drop_index('external_knowledge_bindings_external_knowledge_idx') - batch_op.drop_index('external_knowledge_bindings_external_api_template_idx') + batch_op.drop_index('external_knowledge_bindings_external_knowledge_api_idx') batch_op.drop_index('external_knowledge_bindings_dataset_idx') op.drop_table('external_knowledge_bindings') - with op.batch_alter_table('external_api_templates', schema=None) as batch_op: - batch_op.drop_index('external_api_templates_tenant_idx') - batch_op.drop_index('external_api_templates_name_idx') + with op.batch_alter_table('external_knowledge_apis', schema=None) as batch_op: + batch_op.drop_index('external_knowledge_apis_tenant_idx') + batch_op.drop_index('external_knowledge_apis_name_idx') - op.drop_table('external_api_templates') + op.drop_table('external_knowledge_apis') # ### end Alembic commands ### diff --git a/api/models/dataset.py b/api/models/dataset.py index c61d467956..4224ee5e9c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -180,18 +180,18 @@ class Dataset(db.Model): ) if not external_knowledge_binding: return None - external_api_template = ( - db.session.query(ExternalApiTemplates) - .filter(ExternalApiTemplates.id == external_knowledge_binding.external_api_template_id) + external_knowledge_api = ( + db.session.query(ExternalKnowledgeApis) + .filter(ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id) .first() ) - if not external_api_template: + if not external_knowledge_api: return None return { "external_knowledge_id": external_knowledge_binding.external_knowledge_id, - "external_knowledge_api_id": external_api_template.id, - "external_knowledge_api_name": external_api_template.name, - "external_knowledge_api_endpoint": json.loads(external_api_template.settings).get("endpoint", ""), + "external_knowledge_api_id": external_knowledge_api.id, + "external_knowledge_api_name": external_knowledge_api.name, + "external_knowledge_api_endpoint": json.loads(external_knowledge_api.settings).get("endpoint", ""), } @staticmethod @@ -721,12 +721,12 @@ class DatasetPermission(db.Model): created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)")) -class ExternalApiTemplates(db.Model): - __tablename__ = "external_api_templates" +class ExternalKnowledgeApis(db.Model): + __tablename__ = "external_knowledge_apis" __table_args__ = ( - db.PrimaryKeyConstraint("id", name="external_api_template_pkey"), - db.Index("external_api_templates_tenant_idx", "tenant_id"), - db.Index("external_api_templates_name_idx", "name"), + db.PrimaryKeyConstraint("id", name="external_knowledge_apis_pkey"), + db.Index("external_knowledge_apis_tenant_idx", "tenant_id"), + db.Index("external_knowledge_apis_name_idx", "name"), ) id = db.Column(StringUUID, nullable=False, server_default=db.text("uuid_generate_v4()")) @@ -762,7 +762,7 @@ class ExternalApiTemplates(db.Model): def dataset_bindings(self): external_knowledge_bindings = ( db.session.query(ExternalKnowledgeBindings) - .filter(ExternalKnowledgeBindings.external_api_template_id == self.id) + .filter(ExternalKnowledgeBindings.external_knowledge_api_id == self.id) .all() ) dataset_ids = [binding.dataset_id for binding in external_knowledge_bindings] @@ -781,12 +781,12 @@ class ExternalKnowledgeBindings(db.Model): db.Index("external_knowledge_bindings_tenant_idx", "tenant_id"), db.Index("external_knowledge_bindings_dataset_idx", "dataset_id"), db.Index("external_knowledge_bindings_external_knowledge_idx", "external_knowledge_id"), - db.Index("external_knowledge_bindings_external_api_template_idx", "external_api_template_id"), + db.Index("external_knowledge_bindings_external_knowledge_api_idx", "external_knowledge_api_id"), ) id = db.Column(StringUUID, nullable=False, server_default=db.text("uuid_generate_v4()")) tenant_id = db.Column(StringUUID, nullable=False) - external_api_template_id = db.Column(StringUUID, nullable=False) + external_knowledge_api_id = db.Column(StringUUID, nullable=False) dataset_id = db.Column(StringUUID, nullable=False) external_knowledge_id = db.Column(db.Text, nullable=False) created_by = db.Column(StringUUID, nullable=False) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index a3a5d7b84f..c165def6d5 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -143,7 +143,7 @@ class DatasetService: account: Account, permission: Optional[str] = None, provider: str = "vendor", - external_api_template_id: Optional[str] = None, + external_knowledge_api_id: Optional[str] = None, external_knowledge_id: Optional[str] = None, ): # check if dataset name already exists @@ -167,14 +167,14 @@ class DatasetService: db.session.add(dataset) db.session.flush() - if provider == "external" and external_api_template_id: - external_api_template = ExternalDatasetService.get_api_template(external_api_template_id) - if not external_api_template: + if provider == "external" and external_knowledge_api_id: + external_knowledge_api = ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id) + if not external_knowledge_api: raise ValueError("External API template not found.") external_knowledge_binding = ExternalKnowledgeBindings( tenant_id=tenant_id, dataset_id=dataset.id, - external_api_template_id=external_api_template_id, + external_knowledge_api_id=external_knowledge_api_id, external_knowledge_id=external_knowledge_id, created_by=account.id, ) diff --git a/api/services/entities/external_knowledge_entities/external_knowledge_entities.py b/api/services/entities/external_knowledge_entities/external_knowledge_entities.py index fee258dd22..4545f385eb 100644 --- a/api/services/entities/external_knowledge_entities/external_knowledge_entities.py +++ b/api/services/entities/external_knowledge_entities/external_knowledge_entities.py @@ -19,7 +19,7 @@ class ProcessStatusSetting(BaseModel): url: str -class ApiTemplateSetting(BaseModel): +class ExternalKnowledgeApiSetting(BaseModel): url: str request_method: str headers: Optional[dict] = None diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index 9810c253ba..85ebd27eb2 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -15,26 +15,26 @@ from extensions.ext_database import db from models.dataset import ( Dataset, Document, - ExternalApiTemplates, + ExternalKnowledgeApis, ExternalKnowledgeBindings, ) from models.model import UploadFile -from services.entities.external_knowledge_entities.external_knowledge_entities import ApiTemplateSetting, Authorization +from services.entities.external_knowledge_entities.external_knowledge_entities import ExternalKnowledgeApiSetting, Authorization from services.errors.dataset import DatasetNameDuplicateError class ExternalDatasetService: @staticmethod - def get_external_api_templates(page, per_page, tenant_id, search=None) -> tuple[list[ExternalApiTemplates], int]: - query = ExternalApiTemplates.query.filter(ExternalApiTemplates.tenant_id == tenant_id).order_by( - ExternalApiTemplates.created_at.desc() + def get_external_knowledge_api(page, per_page, tenant_id, search=None) -> tuple[list[ExternalKnowledgeApis], int]: + query = ExternalKnowledgeApis.query.filter(ExternalKnowledgeApis.tenant_id == tenant_id).order_by( + ExternalKnowledgeApis.created_at.desc() ) if search: - query = query.filter(ExternalApiTemplates.name.ilike(f"%{search}%")) + query = query.filter(ExternalKnowledgeApis.name.ilike(f"%{search}%")) - api_templates = query.paginate(page=page, per_page=per_page, max_per_page=100, error_out=False) + external_knowledge_apis = query.paginate(page=page, per_page=per_page, max_per_page=100, error_out=False) - return api_templates.items, api_templates.total + return external_knowledge_apis.items, external_knowledge_apis.total @classmethod def validate_api_list(cls, api_settings: dict): @@ -46,8 +46,8 @@ class ExternalDatasetService: raise ValueError("api_key is required") @staticmethod - def create_api_template(tenant_id: str, user_id: str, args: dict) -> ExternalApiTemplates: - api_template = ExternalApiTemplates( + def create_external_knowledge_api(tenant_id: str, user_id: str, args: dict) -> ExternalKnowledgeApis: + external_knowledge_api = ExternalKnowledgeApis( tenant_id=tenant_id, created_by=user_id, updated_by=user_id, @@ -56,41 +56,41 @@ class ExternalDatasetService: settings=json.dumps(args.get("settings"), ensure_ascii=False), ) - db.session.add(api_template) + db.session.add(external_knowledge_api) db.session.commit() - return api_template + return external_knowledge_api @staticmethod - def get_api_template(external_knowledge_api_id: str) -> ExternalApiTemplates: - return ExternalApiTemplates.query.filter_by(id=external_knowledge_api_id).first() + def get_external_knowledge_api(external_knowledge_api_id: str) -> ExternalKnowledgeApis: + return ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id).first() @staticmethod - def update_api_template(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalApiTemplates: - api_template = ExternalApiTemplates.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() - if api_template is None: + def update_external_knowledge_api(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalKnowledgeApis: + external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() + if external_knowledge_api is None: raise ValueError("api template not found") - api_template.name = args.get("name") - api_template.description = args.get("description", "") - api_template.settings = json.dumps(args.get("settings"), ensure_ascii=False) - api_template.updated_by = user_id - api_template.updated_at = datetime.now(timezone.utc).replace(tzinfo=None) + external_knowledge_api.name = args.get("name") + external_knowledge_api.description = args.get("description", "") + external_knowledge_api.settings = json.dumps(args.get("settings"), ensure_ascii=False) + external_knowledge_api.updated_by = user_id + external_knowledge_api.updated_at = datetime.now(timezone.utc).replace(tzinfo=None) db.session.commit() - return api_template + return external_knowledge_api @staticmethod - def delete_api_template(tenant_id: str, external_knowledge_api_id: str): - api_template = ExternalApiTemplates.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() - if api_template is None: + def delete_external_knowledge_api(tenant_id: str, external_knowledge_api_id: str): + external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() + if external_knowledge_api is None: raise ValueError("api template not found") - db.session.delete(api_template) + db.session.delete(external_knowledge_api) db.session.commit() @staticmethod - def external_api_template_use_check(external_knowledge_api_id: str) -> tuple[bool, int]: - count = ExternalKnowledgeBindings.query.filter_by(external_api_template_id=external_knowledge_api_id).count() + def external_knowledge_api_use_check(external_knowledge_api_id: str) -> tuple[bool, int]: + count = ExternalKnowledgeBindings.query.filter_by(external_knowledge_api_id=external_knowledge_api_id).count() if count > 0: return True, count return False, 0 @@ -105,11 +105,11 @@ class ExternalDatasetService: return external_knowledge_binding @staticmethod - def document_create_args_validate(tenant_id: str, api_template_id: str, process_parameter: dict): - api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first() - if api_template is None: + def document_create_args_validate(tenant_id: str, external_knowledge_api_id: str, process_parameter: dict): + external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() + if external_knowledge_api is None: raise ValueError("api template not found") - settings = json.loads(api_template.settings) + settings = json.loads(external_knowledge_api.settings) for setting in settings: custom_parameters = setting.get("document_process_setting") if custom_parameters: @@ -119,15 +119,15 @@ class ExternalDatasetService: @staticmethod def init_external_dataset(tenant_id: str, user_id: str, args: dict, created_from: str = "web"): - api_template_id = args.get("api_template_id") + external_knowledge_api_id = args.get("external_knowledge_api_id") data_source = args.get("data_source") if data_source is None: raise ValueError("data source is required") process_parameter = args.get("process_parameter") - api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first() - if api_template is None: + external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() + if external_knowledge_api is None: raise ValueError("api template not found") dataset = Dataset( @@ -175,12 +175,12 @@ class ExternalDatasetService: db.session.flush() document_ids.append(document.id) db.session.commit() - # external_document_indexing_task.delay(dataset.id, api_template_id, data_source, process_parameter) + # external_document_indexing_task.delay(dataset.id, external_knowledge_api_id, data_source, process_parameter) return dataset @staticmethod - def process_external_api(settings: ApiTemplateSetting, files: Union[None, dict[str, Any]]) -> httpx.Response: + def process_external_api(settings: ExternalKnowledgeApiSetting, files: Union[None, dict[str, Any]]) -> httpx.Response: """ do http request depending on api bundle """ @@ -222,19 +222,19 @@ class ExternalDatasetService: return headers @staticmethod - def get_api_template_settings(settings: dict) -> ApiTemplateSetting: - return ApiTemplateSetting.parse_obj(settings) + def get_external_knowledge_api_settings(settings: dict) -> ExternalKnowledgeApiSetting: + return ExternalKnowledgeApiSetting.parse_obj(settings) @staticmethod def create_external_dataset(tenant_id: str, user_id: str, args: dict) -> Dataset: # check if dataset name already exists if Dataset.query.filter_by(name=args.get("name"), tenant_id=tenant_id).first(): raise DatasetNameDuplicateError(f"Dataset with name {args.get('name')} already exists.") - api_template = ExternalApiTemplates.query.filter_by( - id=args.get("external_api_template_id"), tenant_id=tenant_id + external_knowledge_api = ExternalKnowledgeApis.query.filter_by( + id=args.get("external_knowledge_api_id"), tenant_id=tenant_id ).first() - if api_template is None: + if external_knowledge_api is None: raise ValueError("api template not found") dataset = Dataset( @@ -252,7 +252,7 @@ class ExternalDatasetService: external_knowledge_binding = ExternalKnowledgeBindings( tenant_id=tenant_id, dataset_id=dataset.id, - external_api_template_id=args.get("external_api_template_id"), + external_knowledge_api_id=args.get("external_knowledge_api_id"), external_knowledge_id=args.get("external_knowledge_id"), created_by=user_id, ) @@ -272,13 +272,13 @@ class ExternalDatasetService: if not external_knowledge_binding: raise ValueError("external knowledge binding not found") - external_api_template = ExternalApiTemplates.query.filter_by( - id=external_knowledge_binding.external_api_template_id + external_knowledge_api = ExternalKnowledgeApis.query.filter_by( + id=external_knowledge_binding.external_knowledge_api_id ).first() - if not external_api_template: + if not external_knowledge_api: raise ValueError("external api template not found") - settings = json.loads(external_api_template.settings) + settings = json.loads(external_knowledge_api.settings) headers = {"Content-Type": "application/json"} if settings.get("api_key"): headers["Authorization"] = f"Bearer {settings.get('api_key')}" @@ -286,13 +286,13 @@ class ExternalDatasetService: external_retrieval_parameters["query"] = query external_retrieval_parameters["external_knowledge_id"] = external_knowledge_binding.external_knowledge_id - api_template_setting = { + external_knowledge_api_setting = { "url": f"{settings.get('endpoint')}/dify/external-knowledge/retrieval-documents", "request_method": "post", "headers": headers, "params": external_retrieval_parameters, } - response = ExternalDatasetService.process_external_api(ApiTemplateSetting(**api_template_setting), None) + response = ExternalDatasetService.process_external_api(ExternalKnowledgeApiSetting(**external_knowledge_api_setting), None) if response.status_code == 200: return response.json() return [] diff --git a/api/tasks/external_document_indexing_task.py b/api/tasks/external_document_indexing_task.py index 987b72e25e..bfd2f155f7 100644 --- a/api/tasks/external_document_indexing_task.py +++ b/api/tasks/external_document_indexing_task.py @@ -8,17 +8,17 @@ from celery import shared_task from core.indexing_runner import DocumentIsPausedException from extensions.ext_database import db from extensions.ext_storage import storage -from models.dataset import Dataset, ExternalApiTemplates +from models.dataset import Dataset, ExternalKnowledgeApis from models.model import UploadFile from services.external_knowledge_service import ExternalDatasetService @shared_task(queue="dataset") -def external_document_indexing_task(dataset_id: str, api_template_id: str, data_source: dict, process_parameter: dict): +def external_document_indexing_task(dataset_id: str, external_knowledge_api_id: str, data_source: dict, process_parameter: dict): """ Async process document :param dataset_id: - :param api_template_id: + :param external_knowledge_api_id: :param data_source: :param process_parameter: Usage: external_document_indexing_task.delay(dataset_id, document_id) @@ -33,16 +33,16 @@ def external_document_indexing_task(dataset_id: str, api_template_id: str, data_ return # get external api template - api_template = ( - db.session.query(ExternalApiTemplates) - .filter(ExternalApiTemplates.id == api_template_id, ExternalApiTemplates.tenant_id == dataset.tenant_id) + external_knowledge_api = ( + db.session.query(ExternalKnowledgeApis) + .filter(ExternalKnowledgeApis.id == external_knowledge_api_id, ExternalKnowledgeApis.tenant_id == dataset.tenant_id) .first() ) - if not api_template: + if not external_knowledge_api: logging.info( click.style( - "Processed external dataset: {} failed, api template: {} not exit.".format(dataset_id, api_template_id), + "Processed external dataset: {} failed, api template: {} not exit.".format(dataset_id, external_knowledge_api_id), fg="red", ) ) @@ -59,7 +59,7 @@ def external_document_indexing_task(dataset_id: str, api_template_id: str, data_ if file: files[file.id] = (file.name, storage.load_once(file.key), file.mime_type) try: - settings = ExternalDatasetService.get_api_template_settings(json.loads(api_template.settings)) + settings = ExternalDatasetService.get_external_knowledge_api_settings(json.loads(external_knowledge_api.settings)) # assemble headers headers = ExternalDatasetService.assembling_headers(settings.authorization, settings.headers)