From 8e73844781d84a8e4f00eb833802fffd57edbb56 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Fri, 27 Sep 2024 16:02:59 +0800 Subject: [PATCH] update to external knowledge api --- .../console/datasets/test_external.py | 2 +- api/poetry.lock | 15 +---------- api/pyproject.toml | 1 + api/services/external_knowledge_service.py | 26 ++++++++++++++++++- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/api/controllers/console/datasets/test_external.py b/api/controllers/console/datasets/test_external.py index 044aceb0c7..da57cb338a 100644 --- a/api/controllers/console/datasets/test_external.py +++ b/api/controllers/console/datasets/test_external.py @@ -39,4 +39,4 @@ class TestExternalApi(Resource): return result, 200 -api.add_resource(TestExternalApi, "/dify/external-knowledge/retrieval-documents") +api.add_resource(TestExternalApi, "/retrieval") diff --git a/api/poetry.lock b/api/poetry.lock index 184cdb9e81..d32dcebc19 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -6641,19 +6641,6 @@ files = [ {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"}, {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"}, {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"}, - {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"}, - {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"}, - {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"}, ] [package.dependencies] @@ -10498,4 +10485,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "17c4108d92c415d987f8b437ea3e0484c5601a05bfe175339a8546c93c159bc5" +content-hash = "1f9d36b61528276a0761d87ef4f9fa787b5c1b49ae85b238c86626fa1110e2e8" diff --git a/api/pyproject.toml b/api/pyproject.toml index 9e38c09456..c3b4ca300f 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -220,6 +220,7 @@ volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"} oci = "^2.133.0" tos = "^2.7.1" nomic = "^3.1.2" +validators = "0.21.0" [tool.poetry.group.indriect.dependencies] kaleido = "0.2.1" rank-bm25 = "~0.2.2" diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index ca7bdc1439..5fc789ded0 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -7,6 +7,7 @@ from typing import Any, Optional, Union import boto3 import httpx +import validators # from tasks.external_document_indexing_task import external_document_indexing_task from configs import dify_config @@ -21,7 +22,7 @@ from models.dataset import ( from models.model import UploadFile from services.entities.external_knowledge_entities.external_knowledge_entities import ExternalKnowledgeApiSetting, Authorization from services.errors.dataset import DatasetNameDuplicateError - +from urllib.parse import urlparse class ExternalDatasetService: @staticmethod @@ -47,6 +48,7 @@ class ExternalDatasetService: @staticmethod def create_external_knowledge_api(tenant_id: str, user_id: str, args: dict) -> ExternalKnowledgeApis: + ExternalDatasetService.check_endpoint_and_api_key(args.get("settings")) external_knowledge_api = ExternalKnowledgeApis( tenant_id=tenant_id, created_by=user_id, @@ -59,6 +61,28 @@ class ExternalDatasetService: db.session.add(external_knowledge_api) db.session.commit() return external_knowledge_api + + @staticmethod + def check_endpoint_and_api_key(settings: dict): + if "endpoint" not in settings or not settings["endpoint"]: + raise ValueError("endpoint is required") + if "api_key" not in settings or not settings["api_key"]: + raise ValueError("api_key is required") + + endpoint = f"{settings['endpoint']}/retrieval" + api_key = settings["api_key"] + if not validators.url(endpoint): + raise ValueError(f"invalid endpoint: {endpoint}") + try: + response = httpx.post(endpoint, headers={"Authorization": f"Bearer {api_key}"}) + except Exception as e: + raise ValueError(f"failed to connect to the endpoint: {endpoint}") + if response.status_code == 502: + raise ValueError(f"Bad Gateway: failed to connect to the endpoint: {endpoint}") + if response.status_code == 404: + raise ValueError(f"Not Found: failed to connect to the endpoint: {endpoint}") + if response.status_code == 403: + raise ValueError(f"Forbidden: Authorization failed with api_key: {api_key}") @staticmethod def get_external_knowledge_api(external_knowledge_api_id: str) -> ExternalKnowledgeApis: