Plugin/merge main to plugin/beta 20250122 (#12962)

Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: kurokobo <kuro664@gmail.com> Co-authored-by: Hiroshi Fujita <fujita-h@users.noreply.github.com> Co-authored-by: NFish <douxc512@gmail.com> Co-authored-by: Gen Sato <52241300+halogen22@users.noreply.github.com> Co-authored-by: eux <euxuuu@gmail.com> Co-authored-by: huangzhuo1949 <167434202+huangzhuo1949@users.noreply.github.com> Co-authored-by: huangzhuo <huangzhuo1@xiaomi.com> Co-authored-by: lotsik <lotsik@mail.ru> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: gakkiyomi <gakkiyomi@aliyun.com> Co-authored-by: CN-P5 <heibai2006@gmail.com> Co-authored-by: CN-P5 <heibai2006@qq.com> Co-authored-by: Chuehnone <1897025+chuehnone@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: Boris Feld <lothiraldan@gmail.com> Co-authored-by: mbo <himabo@gmail.com> Co-authored-by: mabo <mabo@aeyes.ai> Co-authored-by: Warren Chen <warren.chen830@gmail.com> Co-authored-by: KVOJJJin <jzongcode@gmail.com> Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com> Co-authored-by: jiandanfeng <chenjh3@wangsu.com> Co-authored-by: zhu-an <70234959+xhdd123321@users.noreply.github.com> Co-authored-by: zhaoqingyu.1075 <zhaoqingyu.1075@bytedance.com> Co-authored-by: 海狸大師 <86974027+yenslife@users.noreply.github.com> Co-authored-by: Xu Song <xusong.vip@gmail.com> Co-authored-by: rayshaw001 <396301947@163.com> Co-authored-by: Ding Jiatong <dingjiatong@gmail.com> Co-authored-by: Bowen Liang <liangbowen@gf.com.cn> Co-authored-by: JasonVV <jasonwangiii@outlook.com> Co-authored-by: le0zh <newlight@qq.com> Co-authored-by: zhuxinliang <zhuxinliang@didiglobal.com> Co-authored-by: k-zaku <zaku99@outlook.jp> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: luckylhb90 <luckylhb90@gmail.com> Co-authored-by: hobo.l <hobo.l@binance.com> Co-authored-by: jiangbo721 <365065261@qq.com> Co-authored-by: 刘江波 <jiangbo721@163.com> Co-authored-by: Shun Miyazawa <34241526+miya@users.noreply.github.com> Co-authored-by: EricPan <30651140+Egfly@users.noreply.github.com> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: sino <sino2322@gmail.com> Co-authored-by: Jhvcc <37662342+Jhvcc@users.noreply.github.com> Co-authored-by: lowell <lowell.hu@zkteco.in>
2025-01-23 14:48:16 +08:00 · 2025-01-23 14:48:16 +08:00 · 899df30bf6
parent 8d8d3e3f2f
commit 899df30bf6
238 changed files with 4121 additions and 890 deletions
--- a/.github/actions/setup-poetry/action.yml
+++ b/.github/actions/setup-poetry/action.yml
@ -8,7 +8,7 @@ inputs:
  poetry-version:
    description: Poetry version to set up
    required: true
-    default: '1.8.4'
+    default: '2.0.1'
  poetry-lockfile:
    description: Path to the Poetry lockfile to restore cache from
    required: true
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@ -43,19 +43,17 @@ jobs:
        run: poetry install -C api --with dev

      - name: Check dependencies in pyproject.toml
-        run: poetry run -C api bash dev/pytest/pytest_artifacts.sh
+        run: poetry run -P api bash dev/pytest/pytest_artifacts.sh

      - name: Run Unit tests
-        run: poetry run -C api bash dev/pytest/pytest_unit_tests.sh
+        run: poetry run -P api bash dev/pytest/pytest_unit_tests.sh

      - name: Run dify config tests
-        run: poetry run -C api python dev/pytest/pytest_config_tests.py
+        run: poetry run -P api python dev/pytest/pytest_config_tests.py

      - name: Run mypy
        run: |
-          pushd api
-          poetry run python -m mypy --install-types --non-interactive .
-          popd
+          poetry run -C api python -m mypy --install-types --non-interactive .

      - name: Set up dotenvs
        run: |
@ -75,4 +73,4 @@ jobs:
            ssrf_proxy

      - name: Run Workflow
-        run: poetry run -C api bash dev/pytest/pytest_workflow.sh
+        run: poetry run -P api bash dev/pytest/pytest_workflow.sh
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@ -39,12 +39,12 @@ jobs:
        if: steps.changed-files.outputs.any_changed == 'true'
        run: |
          poetry run -C api ruff --version
-          poetry run -C api ruff check ./api
-          poetry run -C api ruff format --check ./api
+          poetry run -C api ruff check ./
+          poetry run -C api ruff format --check ./

      - name: Dotenv check
        if: steps.changed-files.outputs.any_changed == 'true'
-        run: poetry run -C api dotenv-linter ./api/.env.example ./web/.env.example
+        run: poetry run -P api dotenv-linter ./api/.env.example ./web/.env.example

      - name: Lint hints
        if: failure()
@ -87,7 +87,35 @@ jobs:

      - name: Web style check
        if: steps.changed-files.outputs.any_changed == 'true'
-        run: echo "${{ steps.changed-files.outputs.all_changed_files }}" | sed 's|web/||g' | xargs pnpm eslint # wait for next lint support eslint v9
+        run: yarn run lint
+
+  docker-compose-template:
+    name: Docker Compose Template
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: |
+            docker/generate_docker_compose
+            docker/.env.example
+            docker/docker-compose-template.yaml
+            docker/docker-compose.yaml
+
+      - name: Generate Docker Compose
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: |
+          cd docker
+          ./generate_docker_compose
+
+      - name: Check for changes
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: git diff --exit-code

  superlinter:
    name: SuperLinter
--- a/.github/workflows/vdb-tests.yml
+++ b/.github/workflows/vdb-tests.yml
@ -70,4 +70,4 @@ jobs:
            tidb

      - name: Test Vector Stores
-        run: poetry run -C api bash dev/pytest/pytest_vdb.sh
+        run: poetry run -P api bash dev/pytest/pytest_vdb.sh
--- a/api/.ruff.toml
+++ b/api/.ruff.toml
@ -53,10 +53,12 @@ ignore = [
    "FURB152", # math-constant
    "UP007", # non-pep604-annotation
    "UP032", # f-string
+    "UP045", # non-pep604-annotation-optional
    "B005", # strip-with-multi-characters
    "B006", # mutable-argument-default
    "B007", # unused-loop-control-variable
    "B026", # star-arg-unpacking-after-keyword-arg
+    "B903", # class-as-data-structure
    "B904", # raise-without-from-inside-except
    "B905", # zip-without-explicit-strict
    "N806", # non-lowercase-variable-in-function
--- a/api/Dockerfile
+++ b/api/Dockerfile
@ -4,7 +4,7 @@ FROM python:3.12-slim-bookworm AS base
 WORKDIR /app/api

 # Install Poetry
-ENV POETRY_VERSION=1.8.4
+ENV POETRY_VERSION=2.0.1

 # if you located in China, you can use aliyun mirror to speed up
 # RUN pip install --no-cache-dir poetry==${POETRY_VERSION} -i https://mirrors.aliyun.com/pypi/simple/
--- a/api/README.md
+++ b/api/README.md
@ -79,5 +79,5 @@
 2. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml`

   ```bash
-   poetry run -C api bash dev/pytest/pytest_all_tests.sh
+   poetry run -P api bash dev/pytest/pytest_all_tests.sh
   ```
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@ -200,7 +200,7 @@ class EndpointConfig(BaseSettings):
    )

    CONSOLE_WEB_URL: str = Field(
-        description="Base URL for the console web interface," "used for frontend references and CORS configuration",
+        description="Base URL for the console web interface,used for frontend references and CORS configuration",
        default="",
    )

--- a/api/configs/feature/hosted_service/init.py
+++ b/api/configs/feature/hosted_service/init.py
@ -181,7 +181,7 @@ class HostedFetchAppTemplateConfig(BaseSettings):
    """

    HOSTED_FETCH_APP_TEMPLATES_MODE: str = Field(
-        description="Mode for fetching app templates: remote, db, or builtin" " default to remote,",
+        description="Mode for fetching app templates: remote, db, or builtin default to remote,",
        default="remote",
    )

--- a/api/controllers/console/admin.py
+++ b/api/controllers/console/admin.py
@ -59,7 +59,7 @@ class InsertExploreAppListApi(Resource):
        with Session(db.engine) as session:
            app = session.execute(select(App).filter(App.id == args["app_id"])).scalar_one_or_none()
        if not app:
-            raise NotFound(f'App \'{args["app_id"]}\' is not found')
+            raise NotFound(f"App '{args['app_id']}' is not found")

        site = app.site
        if not site:
--- a/api/controllers/console/app/audio.py
+++ b/api/controllers/console/app/audio.py
@ -22,7 +22,7 @@ from controllers.console.wraps import account_initialization_required, setup_req
 from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
 from core.model_runtime.errors.invoke import InvokeError
 from libs.login import login_required
-from models.model import AppMode
+from models import App, AppMode
 from services.audio_service import AudioService
 from services.errors.audio import (
    AudioTooLargeServiceError,
@ -79,7 +79,7 @@ class ChatMessageTextApi(Resource):
    @login_required
    @account_initialization_required
    @get_app_model
-    def post(self, app_model):
+    def post(self, app_model: App):
        from werkzeug.exceptions import InternalServerError

        try:
@ -98,9 +98,13 @@ class ChatMessageTextApi(Resource):
                and app_model.workflow.features_dict
            ):
                text_to_speech = app_model.workflow.features_dict.get("text_to_speech")
+                if text_to_speech is None:
+                    raise ValueError("TTS is not enabled")
                voice = args.get("voice") or text_to_speech.get("voice")
            else:
                try:
+                    if app_model.app_model_config is None:
+                        raise ValueError("AppModelConfig not found")
                    voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice")
                except Exception:
                    voice = None
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -52,12 +52,12 @@ class DatasetListApi(Resource):
        # provider = request.args.get("provider", default="vendor")
        search = request.args.get("keyword", default=None, type=str)
        tag_ids = request.args.getlist("tag_ids")
-
+        include_all = request.args.get("include_all", default="false").lower() == "true"
        if ids:
            datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
        else:
            datasets, total = DatasetService.get_datasets(
-                page, limit, current_user.current_tenant_id, current_user, search, tag_ids
+                page, limit, current_user.current_tenant_id, current_user, search, tag_ids, include_all
            )

        # check embedding setting
@ -457,7 +457,7 @@ class DatasetIndexingEstimateApi(Resource):
            )
        except LLMBadRequestError:
            raise ProviderNotInitializeError(
-                "No Embedding Model available. Please configure a valid provider " "in the Settings -> Model Provider."
+                "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
            )
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
@ -619,8 +619,7 @@ class DatasetRetrievalSettingApi(Resource):
        vector_type = dify_config.VECTOR_STORE
        match vector_type:
            case (
-                VectorType.MILVUS
-                | VectorType.RELYT
+                VectorType.RELYT
                | VectorType.PGVECTOR
                | VectorType.TIDB_VECTOR
                | VectorType.CHROMA
@ -645,6 +644,7 @@ class DatasetRetrievalSettingApi(Resource):
                | VectorType.TIDB_ON_QDRANT
                | VectorType.LINDORM
                | VectorType.COUCHBASE
+                | VectorType.MILVUS
            ):
                return {
                    "retrieval_method": [
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@ -362,8 +362,7 @@ class DatasetInitApi(Resource):
                )
            except InvokeAuthorizationError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
@ -540,8 +539,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
                return response.model_dump(), 200
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@ -168,8 +168,7 @@ class DatasetDocumentSegmentApi(Resource):
                )
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
@ -217,8 +216,7 @@ class DatasetDocumentSegmentAddApi(Resource):
                )
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
@ -267,8 +265,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
                )
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
@ -368,9 +365,9 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
            result = []
            for index, row in df.iterrows():
                if document.doc_form == "qa_model":
-                    data = {"content": row[0], "answer": row[1]}
+                    data = {"content": row.iloc[0], "answer": row.iloc[1]}
                else:
-                    data = {"content": row[0]}
+                    data = {"content": row.iloc[0]}
                result.append(data)
            if len(result) == 0:
                raise ValueError("The CSV file is empty.")
@ -437,8 +434,7 @@ class ChildChunkAddApi(Resource):
                )
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
--- a/api/controllers/console/explore/conversation.py
+++ b/api/controllers/console/explore/conversation.py
@ -32,7 +32,7 @@ class ConversationListApi(InstalledAppResource):

        pinned = None
        if "pinned" in args and args["pinned"] is not None:
-            pinned = True if args["pinned"] == "true" else False
+            pinned = args["pinned"] == "true"

        try:
            with Session(db.engine) as session:
--- a/api/controllers/service_api/init.py
+++ b/api/controllers/service_api/init.py
@ -7,4 +7,4 @@ api = ExternalApi(bp)

 from . import index
 from .app import app, audio, completion, conversation, file, message, workflow
-from .dataset import dataset, document, hit_testing, segment
+from .dataset import dataset, document, hit_testing, segment, upload_file
--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@ -31,8 +31,11 @@ class DatasetListApi(DatasetApiResource):
        # provider = request.args.get("provider", default="vendor")
        search = request.args.get("keyword", default=None, type=str)
        tag_ids = request.args.getlist("tag_ids")
+        include_all = request.args.get("include_all", default="false").lower() == "true"

-        datasets, total = DatasetService.get_datasets(page, limit, tenant_id, current_user, search, tag_ids)
+        datasets, total = DatasetService.get_datasets(
+            page, limit, tenant_id, current_user, search, tag_ids, include_all
+        )
        # check embedding setting
        provider_manager = ProviderManager()
        configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)
--- a/api/controllers/service_api/dataset/segment.py
+++ b/api/controllers/service_api/dataset/segment.py
@ -53,8 +53,7 @@ class SegmentApi(DatasetApiResource):
                )
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
@ -95,8 +94,7 @@ class SegmentApi(DatasetApiResource):
                )
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
@ -175,8 +173,7 @@ class DatasetSegmentApi(DatasetApiResource):
                )
            except LLMBadRequestError:
                raise ProviderNotInitializeError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ProviderNotInitializeError(ex.description)
--- a/api/controllers/service_api/dataset/upload_file.py
+++ b/api/controllers/service_api/dataset/upload_file.py
@ -0,0 +1,54 @@
+from werkzeug.exceptions import NotFound
+
+from controllers.service_api import api
+from controllers.service_api.wraps import (
+    DatasetApiResource,
+)
+from core.file import helpers as file_helpers
+from extensions.ext_database import db
+from models.dataset import Dataset
+from models.model import UploadFile
+from services.dataset_service import DocumentService
+
+
+class UploadFileApi(DatasetApiResource):
+    def get(self, tenant_id, dataset_id, document_id):
+        """Get upload file."""
+        # check dataset
+        dataset_id = str(dataset_id)
+        tenant_id = str(tenant_id)
+        dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset.id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        # check upload file
+        if document.data_source_type != "upload_file":
+            raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
+        data_source_info = document.data_source_info_dict
+        if data_source_info and "upload_file_id" in data_source_info:
+            file_id = data_source_info["upload_file_id"]
+            upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
+            if not upload_file:
+                raise NotFound("UploadFile not found.")
+        else:
+            raise ValueError("Upload file id not found in document data source info.")
+
+        url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
+        return {
+            "id": upload_file.id,
+            "name": upload_file.name,
+            "size": upload_file.size,
+            "extension": upload_file.extension,
+            "url": url,
+            "download_url": f"{url}&as_attachment=true",
+            "mime_type": upload_file.mime_type,
+            "created_by": upload_file.created_by,
+            "created_at": upload_file.created_at.timestamp(),
+        }, 200
+
+
+api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")
--- a/api/controllers/service_api/wraps.py
+++ b/api/controllers/service_api/wraps.py
@ -195,7 +195,11 @@ def validate_and_get_api_token(scope: str | None = None):
    with Session(db.engine, expire_on_commit=False) as session:
        update_stmt = (
            update(ApiToken)
-            .where(ApiToken.token == auth_token, ApiToken.last_used_at < cutoff_time, ApiToken.type == scope)
+            .where(
+                ApiToken.token == auth_token,
+                (ApiToken.last_used_at.is_(None) | (ApiToken.last_used_at < cutoff_time)),
+                ApiToken.type == scope,
+            )
            .values(last_used_at=current_time)
            .returning(ApiToken)
        )
@ -236,7 +240,7 @@ def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str]
            tenant_id=app_model.tenant_id,
            app_id=app_model.id,
            type="service_api",
-            is_anonymous=True if user_id == "DEFAULT-USER" else False,
+            is_anonymous=user_id == "DEFAULT-USER",
            session_id=user_id,
        )
        db.session.add(end_user)
--- a/api/controllers/web/conversation.py
+++ b/api/controllers/web/conversation.py
@ -39,7 +39,7 @@ class ConversationListApi(WebApiResource):

        pinned = None
        if "pinned" in args and args["pinned"] is not None:
-            pinned = True if args["pinned"] == "true" else False
+            pinned = args["pinned"] == "true"

        try:
            with Session(db.engine) as session:
--- a/api/core/agent/cot_agent_runner.py
+++ b/api/core/agent/cot_agent_runner.py
@ -168,7 +168,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):

            self.save_agent_thought(
                agent_thought=agent_thought,
-                tool_name=scratchpad.action.action_name if scratchpad.action else "",
+                tool_name=(scratchpad.action.action_name if scratchpad.action and not scratchpad.is_final() else ""),
                tool_input={scratchpad.action.action_name: scratchpad.action.action_input} if scratchpad.action else {},
                tool_invoke_meta={},
                thought=scratchpad.thought or "",
--- a/api/core/app/apps/base_app_queue_manager.py
+++ b/api/core/app/apps/base_app_queue_manager.py
@ -167,8 +167,7 @@ class AppQueueManager:
        else:
            if isinstance(data, DeclarativeMeta) or hasattr(data, "_sa_instance_state"):
                raise TypeError(
-                    "Critical Error: Passing SQLAlchemy Model instances "
-                    "that cause thread safety issues is not allowed."
+                    "Critical Error: Passing SQLAlchemy Model instances that cause thread safety issues is not allowed."
                )


--- a/api/core/app/apps/message_based_app_generator.py
+++ b/api/core/app/apps/message_based_app_generator.py
@ -89,6 +89,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):
            Conversation.id == conversation_id,
            Conversation.app_id == app_model.id,
            Conversation.status == "normal",
+            Conversation.is_deleted.is_(False),
        ]

        if isinstance(user, Account):
--- a/api/core/app/task_pipeline/message_cycle_manage.py
+++ b/api/core/app/task_pipeline/message_cycle_manage.py
@ -145,7 +145,7 @@ class MessageCycleManage:

            # get extension
            if "." in message_file.url:
-                extension = f'.{message_file.url.split(".")[-1]}'
+                extension = f".{message_file.url.split('.')[-1]}"
                if len(extension) > 10:
                    extension = ".bin"
            else:
--- a/api/core/external_data_tool/api/api.py
+++ b/api/core/external_data_tool/api/api.py
@ -62,8 +62,9 @@ class ApiExternalDataTool(ExternalDataTool):

        if not api_based_extension:
            raise ValueError(
-                "[External data tool] API query failed, variable: {}, "
-                "error: api_based_extension_id is invalid".format(self.variable)
+                "[External data tool] API query failed, variable: {}, error: api_based_extension_id is invalid".format(
+                    self.variable
+                )
            )

        # decrypt api_key
--- a/api/core/file/models.py
+++ b/api/core/file/models.py
@ -90,7 +90,7 @@ class File(BaseModel):
    def markdown(self) -> str:
        url = self.generate_url()
        if self.type == FileType.IMAGE:
-            text = f'![{self.filename or ""}]({url})'
+            text = f"![{self.filename or ''}]({url})"
        else:
            text = f"[{self.filename or url}]({url})"

--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@ -530,7 +530,6 @@ class IndexingRunner:
        # chunk nodes by chunk size
        indexing_start_at = time.perf_counter()
        tokens = 0
-        chunk_size = 10
        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
            # create keyword index
            create_keyword_thread = threading.Thread(
@ -539,11 +538,22 @@ class IndexingRunner:
            )
            create_keyword_thread.start()

+        max_workers = 10
        if dataset.indexing_technique == "high_quality":
-            with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
                futures = []
-                for i in range(0, len(documents), chunk_size):
-                    chunk_documents = documents[i : i + chunk_size]
+
+                # Distribute documents into multiple groups based on the hash values of page_content
+                # This is done to prevent multiple threads from processing the same document,
+                # Thereby avoiding potential database insertion deadlocks
+                document_groups: list[list[Document]] = [[] for _ in range(max_workers)]
+                for document in documents:
+                    hash = helper.generate_text_hash(document.page_content)
+                    group_index = int(hash, 16) % max_workers
+                    document_groups[group_index].append(document)
+                for chunk_documents in document_groups:
+                    if len(chunk_documents) == 0:
+                        continue
                    futures.append(
                        executor.submit(
                            self._process_chunk,
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@ -131,7 +131,7 @@ JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE = (
 SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
    "Please help me predict the three most likely questions that human would ask, "
    "and keeping each question under 20 characters.\n"
-    "MAKE SURE your output is the SAME language as the Assistant's latest response"
+    "MAKE SURE your output is the SAME language as the Assistant's latest response. "
    "The output must be an array in JSON format following the specified schema:\n"
    '["question1","question2","question3"]\n'
 )
--- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py
+++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py
@ -1,6 +1,9 @@
+import logging
 from threading import Lock
 from typing import Any

+logger = logging.getLogger(__name__)
+
 _tokenizer: Any = None
 _lock = Lock()

@ -43,5 +46,6 @@ class GPT2Tokenizer:
                    base_path = abspath(__file__)
                    gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
                    _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
+                    logger.info("Fallback to Transformers' GPT-2 tokenizer from tiktoken")

            return _tokenizer
--- a/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml
+++ b/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml
@ -1,42 +0,0 @@
-model: ernie-lite-pro-128k
-label:
-  en_US: Ernie-Lite-Pro-128K
-model_type: llm
-features:
-  - agent-thought
-model_properties:
-  mode: chat
-  context_size: 128000
-parameter_rules:
-  - name: temperature
-    use_template: temperature
-    min: 0.1
-    max: 1.0
-    default: 0.8
-  - name: top_p
-    use_template: top_p
-  - name: min_output_tokens
-    label:
-      en_US: "Min Output Tokens"
-      zh_Hans: "最小输出Token数"
-    use_template: max_tokens
-    min: 2
-    max: 2048
-    help:
-      zh_Hans: 指定模型最小输出token数
-      en_US: Specifies the lower limit on the length of generated results.
-  - name: max_output_tokens
-    label:
-      en_US: "Max Output Tokens"
-      zh_Hans: "最大输出Token数"
-    use_template: max_tokens
-    min: 2
-    max: 2048
-    default: 2048
-    help:
-      zh_Hans: 指定模型最大输出token数
-      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
--- a/api/core/model_runtime/schema_validators/common_validator.py
+++ b/api/core/model_runtime/schema_validators/common_validator.py
@ -87,6 +87,6 @@ class CommonValidator:
            if value.lower() not in {"true", "false"}:
                raise ValueError(f"Variable {credential_form_schema.variable} should be true or false")

-            value = True if value.lower() == "true" else False
+            value = value.lower() == "true"

        return value
--- a/api/core/ops/entities/config_entity.py
+++ b/api/core/ops/entities/config_entity.py
@ -6,6 +6,7 @@ from pydantic import BaseModel, ValidationInfo, field_validator
 class TracingProviderEnum(Enum):
    LANGFUSE = "langfuse"
    LANGSMITH = "langsmith"
+    OPIK = "opik"


 class BaseTracingConfig(BaseModel):
@ -56,5 +57,36 @@ class LangSmithConfig(BaseTracingConfig):
        return v


+class OpikConfig(BaseTracingConfig):
+    """
+    Model class for Opik tracing config.
+    """
+
+    api_key: str | None = None
+    project: str | None = None
+    workspace: str | None = None
+    url: str = "https://www.comet.com/opik/api/"
+
+    @field_validator("project")
+    @classmethod
+    def project_validator(cls, v, info: ValidationInfo):
+        if v is None or v == "":
+            v = "Default Project"
+
+        return v
+
+    @field_validator("url")
+    @classmethod
+    def url_validator(cls, v, info: ValidationInfo):
+        if v is None or v == "":
+            v = "https://www.comet.com/opik/api/"
+        if not v.startswith(("https://", "http://")):
+            raise ValueError("url must start with https:// or http://")
+        if not v.endswith("/api/"):
+            raise ValueError("url should ends with /api/")
+
+        return v
+
+
 OPS_FILE_PATH = "ops_trace/"
 OPS_TRACE_FAILED_KEY = "FAILED_OPS_TRACE"
--- a/api/core/ops/opik_trace/init.py
+++ b/api/core/ops/opik_trace/init.py
--- a/api/core/ops/opik_trace/opik_trace.py
+++ b/api/core/ops/opik_trace/opik_trace.py
@ -0,0 +1,469 @@
+import json
+import logging
+import os
+import uuid
+from datetime import datetime, timedelta
+from typing import Optional, cast
+
+from opik import Opik, Trace
+from opik.id_helpers import uuid4_to_uuid7
+
+from core.ops.base_trace_instance import BaseTraceInstance
+from core.ops.entities.config_entity import OpikConfig
+from core.ops.entities.trace_entity import (
+    BaseTraceInfo,
+    DatasetRetrievalTraceInfo,
+    GenerateNameTraceInfo,
+    MessageTraceInfo,
+    ModerationTraceInfo,
+    SuggestedQuestionTraceInfo,
+    ToolTraceInfo,
+    TraceTaskName,
+    WorkflowTraceInfo,
+)
+from extensions.ext_database import db
+from models.model import EndUser, MessageFile
+from models.workflow import WorkflowNodeExecution
+
+logger = logging.getLogger(__name__)
+
+
+def wrap_dict(key_name, data):
+    """Make sure that the input data is a dict"""
+    if not isinstance(data, dict):
+        return {key_name: data}
+
+    return data
+
+
+def wrap_metadata(metadata, **kwargs):
+    """Add common metatada to all Traces and Spans"""
+    metadata["created_from"] = "dify"
+
+    metadata.update(kwargs)
+
+    return metadata
+
+
+def prepare_opik_uuid(user_datetime: Optional[datetime], user_uuid: Optional[str]):
+    """Opik needs UUIDv7 while Dify uses UUIDv4 for identifier of most
+    messages and objects. The type-hints of BaseTraceInfo indicates that
+    objects start_time and message_id could be null which means we cannot map
+    it to a UUIDv7. Given that we have no way to identify that object
+    uniquely, generate a new random one UUIDv7 in that case.
+    """
+
+    if user_datetime is None:
+        user_datetime = datetime.now()
+
+    if user_uuid is None:
+        user_uuid = str(uuid.uuid4())
+
+    return uuid4_to_uuid7(user_datetime, user_uuid)
+
+
+class OpikDataTrace(BaseTraceInstance):
+    def __init__(
+        self,
+        opik_config: OpikConfig,
+    ):
+        super().__init__(opik_config)
+        self.opik_client = Opik(
+            project_name=opik_config.project,
+            workspace=opik_config.workspace,
+            host=opik_config.url,
+            api_key=opik_config.api_key,
+        )
+        self.project = opik_config.project
+        self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001")
+
+    def trace(self, trace_info: BaseTraceInfo):
+        if isinstance(trace_info, WorkflowTraceInfo):
+            self.workflow_trace(trace_info)
+        if isinstance(trace_info, MessageTraceInfo):
+            self.message_trace(trace_info)
+        if isinstance(trace_info, ModerationTraceInfo):
+            self.moderation_trace(trace_info)
+        if isinstance(trace_info, SuggestedQuestionTraceInfo):
+            self.suggested_question_trace(trace_info)
+        if isinstance(trace_info, DatasetRetrievalTraceInfo):
+            self.dataset_retrieval_trace(trace_info)
+        if isinstance(trace_info, ToolTraceInfo):
+            self.tool_trace(trace_info)
+        if isinstance(trace_info, GenerateNameTraceInfo):
+            self.generate_name_trace(trace_info)
+
+    def workflow_trace(self, trace_info: WorkflowTraceInfo):
+        dify_trace_id = trace_info.workflow_run_id
+        opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
+        workflow_metadata = wrap_metadata(
+            trace_info.metadata, message_id=trace_info.message_id, workflow_app_log_id=trace_info.workflow_app_log_id
+        )
+        root_span_id = None
+
+        if trace_info.message_id:
+            dify_trace_id = trace_info.message_id
+            opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
+
+            trace_data = {
+                "id": opik_trace_id,
+                "name": TraceTaskName.MESSAGE_TRACE.value,
+                "start_time": trace_info.start_time,
+                "end_time": trace_info.end_time,
+                "metadata": workflow_metadata,
+                "input": wrap_dict("input", trace_info.workflow_run_inputs),
+                "output": wrap_dict("output", trace_info.workflow_run_outputs),
+                "tags": ["message", "workflow"],
+                "project_name": self.project,
+            }
+            self.add_trace(trace_data)
+
+            root_span_id = prepare_opik_uuid(trace_info.start_time, trace_info.workflow_run_id)
+            span_data = {
+                "id": root_span_id,
+                "parent_span_id": None,
+                "trace_id": opik_trace_id,
+                "name": TraceTaskName.WORKFLOW_TRACE.value,
+                "input": wrap_dict("input", trace_info.workflow_run_inputs),
+                "output": wrap_dict("output", trace_info.workflow_run_outputs),
+                "start_time": trace_info.start_time,
+                "end_time": trace_info.end_time,
+                "metadata": workflow_metadata,
+                "tags": ["workflow"],
+                "project_name": self.project,
+            }
+            self.add_span(span_data)
+        else:
+            trace_data = {
+                "id": opik_trace_id,
+                "name": TraceTaskName.MESSAGE_TRACE.value,
+                "start_time": trace_info.start_time,
+                "end_time": trace_info.end_time,
+                "metadata": workflow_metadata,
+                "input": wrap_dict("input", trace_info.workflow_run_inputs),
+                "output": wrap_dict("output", trace_info.workflow_run_outputs),
+                "tags": ["workflow"],
+                "project_name": self.project,
+            }
+            self.add_trace(trace_data)
+
+        # through workflow_run_id get all_nodes_execution
+        workflow_nodes_execution_id_records = (
+            db.session.query(WorkflowNodeExecution.id)
+            .filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id)
+            .all()
+        )
+
+        for node_execution_id_record in workflow_nodes_execution_id_records:
+            node_execution = (
+                db.session.query(
+                    WorkflowNodeExecution.id,
+                    WorkflowNodeExecution.tenant_id,
+                    WorkflowNodeExecution.app_id,
+                    WorkflowNodeExecution.title,
+                    WorkflowNodeExecution.node_type,
+                    WorkflowNodeExecution.status,
+                    WorkflowNodeExecution.inputs,
+                    WorkflowNodeExecution.outputs,
+                    WorkflowNodeExecution.created_at,
+                    WorkflowNodeExecution.elapsed_time,
+                    WorkflowNodeExecution.process_data,
+                    WorkflowNodeExecution.execution_metadata,
+                )
+                .filter(WorkflowNodeExecution.id == node_execution_id_record.id)
+                .first()
+            )
+
+            if not node_execution:
+                continue
+
+            node_execution_id = node_execution.id
+            tenant_id = node_execution.tenant_id
+            app_id = node_execution.app_id
+            node_name = node_execution.title
+            node_type = node_execution.node_type
+            status = node_execution.status
+            if node_type == "llm":
+                inputs = (
+                    json.loads(node_execution.process_data).get("prompts", {}) if node_execution.process_data else {}
+                )
+            else:
+                inputs = json.loads(node_execution.inputs) if node_execution.inputs else {}
+            outputs = json.loads(node_execution.outputs) if node_execution.outputs else {}
+            created_at = node_execution.created_at or datetime.now()
+            elapsed_time = node_execution.elapsed_time
+            finished_at = created_at + timedelta(seconds=elapsed_time)
+
+            execution_metadata = (
+                json.loads(node_execution.execution_metadata) if node_execution.execution_metadata else {}
+            )
+            metadata = execution_metadata.copy()
+            metadata.update(
+                {
+                    "workflow_run_id": trace_info.workflow_run_id,
+                    "node_execution_id": node_execution_id,
+                    "tenant_id": tenant_id,
+                    "app_id": app_id,
+                    "app_name": node_name,
+                    "node_type": node_type,
+                    "status": status,
+                }
+            )
+
+            process_data = json.loads(node_execution.process_data) if node_execution.process_data else {}
+
+            provider = None
+            model = None
+            total_tokens = 0
+            completion_tokens = 0
+            prompt_tokens = 0
+
+            if process_data and process_data.get("model_mode") == "chat":
+                run_type = "llm"
+                provider = process_data.get("model_provider", None)
+                model = process_data.get("model_name", "")
+                metadata.update(
+                    {
+                        "ls_provider": provider,
+                        "ls_model_name": model,
+                    }
+                )
+
+                try:
+                    if outputs.get("usage"):
+                        total_tokens = outputs["usage"].get("total_tokens", 0)
+                        prompt_tokens = outputs["usage"].get("prompt_tokens", 0)
+                        completion_tokens = outputs["usage"].get("completion_tokens", 0)
+                except Exception:
+                    logger.error("Failed to extract usage", exc_info=True)
+
+            else:
+                run_type = "tool"
+
+            parent_span_id = trace_info.workflow_app_log_id or trace_info.workflow_run_id
+
+            if not total_tokens:
+                total_tokens = execution_metadata.get("total_tokens", 0)
+
+            span_data = {
+                "trace_id": opik_trace_id,
+                "id": prepare_opik_uuid(created_at, node_execution_id),
+                "parent_span_id": prepare_opik_uuid(trace_info.start_time, parent_span_id),
+                "name": node_type,
+                "type": run_type,
+                "start_time": created_at,
+                "end_time": finished_at,
+                "metadata": wrap_metadata(metadata),
+                "input": wrap_dict("input", inputs),
+                "output": wrap_dict("output", outputs),
+                "tags": ["node_execution"],
+                "project_name": self.project,
+                "usage": {
+                    "total_tokens": total_tokens,
+                    "completion_tokens": completion_tokens,
+                    "prompt_tokens": prompt_tokens,
+                },
+                "model": model,
+                "provider": provider,
+            }
+
+            self.add_span(span_data)
+
+    def message_trace(self, trace_info: MessageTraceInfo):
+        # get message file data
+        file_list = cast(list[str], trace_info.file_list) or []
+        message_file_data: Optional[MessageFile] = trace_info.message_file_data
+
+        if message_file_data is not None:
+            file_url = f"{self.file_base_url}/{message_file_data.url}" if message_file_data else ""
+            file_list.append(file_url)
+
+        message_data = trace_info.message_data
+        if message_data is None:
+            return
+
+        metadata = trace_info.metadata
+        message_id = trace_info.message_id
+
+        user_id = message_data.from_account_id
+        metadata["user_id"] = user_id
+        metadata["file_list"] = file_list
+
+        if message_data.from_end_user_id:
+            end_user_data: Optional[EndUser] = (
+                db.session.query(EndUser).filter(EndUser.id == message_data.from_end_user_id).first()
+            )
+            if end_user_data is not None:
+                end_user_id = end_user_data.session_id
+                metadata["end_user_id"] = end_user_id
+
+        trace_data = {
+            "id": prepare_opik_uuid(trace_info.start_time, message_id),
+            "name": TraceTaskName.MESSAGE_TRACE.value,
+            "start_time": trace_info.start_time,
+            "end_time": trace_info.end_time,
+            "metadata": wrap_metadata(metadata),
+            "input": trace_info.inputs,
+            "output": message_data.answer,
+            "tags": ["message", str(trace_info.conversation_mode)],
+            "project_name": self.project,
+        }
+        trace = self.add_trace(trace_data)
+
+        span_data = {
+            "trace_id": trace.id,
+            "name": "llm",
+            "type": "llm",
+            "start_time": trace_info.start_time,
+            "end_time": trace_info.end_time,
+            "metadata": wrap_metadata(metadata),
+            "input": {"input": trace_info.inputs},
+            "output": {"output": message_data.answer},
+            "tags": ["llm", str(trace_info.conversation_mode)],
+            "usage": {
+                "completion_tokens": trace_info.answer_tokens,
+                "prompt_tokens": trace_info.message_tokens,
+                "total_tokens": trace_info.total_tokens,
+            },
+            "project_name": self.project,
+        }
+        self.add_span(span_data)
+
+    def moderation_trace(self, trace_info: ModerationTraceInfo):
+        if trace_info.message_data is None:
+            return
+
+        start_time = trace_info.start_time or trace_info.message_data.created_at
+
+        span_data = {
+            "trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
+            "name": TraceTaskName.MODERATION_TRACE.value,
+            "type": "tool",
+            "start_time": start_time,
+            "end_time": trace_info.end_time or trace_info.message_data.updated_at,
+            "metadata": wrap_metadata(trace_info.metadata),
+            "input": wrap_dict("input", trace_info.inputs),
+            "output": {
+                "action": trace_info.action,
+                "flagged": trace_info.flagged,
+                "preset_response": trace_info.preset_response,
+                "inputs": trace_info.inputs,
+            },
+            "tags": ["moderation"],
+        }
+
+        self.add_span(span_data)
+
+    def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo):
+        message_data = trace_info.message_data
+        if message_data is None:
+            return
+
+        start_time = trace_info.start_time or message_data.created_at
+
+        span_data = {
+            "trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
+            "name": TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
+            "type": "tool",
+            "start_time": start_time,
+            "end_time": trace_info.end_time or message_data.updated_at,
+            "metadata": wrap_metadata(trace_info.metadata),
+            "input": wrap_dict("input", trace_info.inputs),
+            "output": wrap_dict("output", trace_info.suggested_question),
+            "tags": ["suggested_question"],
+        }
+
+        self.add_span(span_data)
+
+    def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo):
+        if trace_info.message_data is None:
+            return
+
+        start_time = trace_info.start_time or trace_info.message_data.created_at
+
+        span_data = {
+            "trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
+            "name": TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
+            "type": "tool",
+            "start_time": start_time,
+            "end_time": trace_info.end_time or trace_info.message_data.updated_at,
+            "metadata": wrap_metadata(trace_info.metadata),
+            "input": wrap_dict("input", trace_info.inputs),
+            "output": {"documents": trace_info.documents},
+            "tags": ["dataset_retrieval"],
+        }
+
+        self.add_span(span_data)
+
+    def tool_trace(self, trace_info: ToolTraceInfo):
+        span_data = {
+            "trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
+            "name": trace_info.tool_name,
+            "type": "tool",
+            "start_time": trace_info.start_time,
+            "end_time": trace_info.end_time,
+            "metadata": wrap_metadata(trace_info.metadata),
+            "input": wrap_dict("input", trace_info.tool_inputs),
+            "output": wrap_dict("output", trace_info.tool_outputs),
+            "tags": ["tool", trace_info.tool_name],
+        }
+
+        self.add_span(span_data)
+
+    def generate_name_trace(self, trace_info: GenerateNameTraceInfo):
+        trace_data = {
+            "id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
+            "name": TraceTaskName.GENERATE_NAME_TRACE.value,
+            "start_time": trace_info.start_time,
+            "end_time": trace_info.end_time,
+            "metadata": wrap_metadata(trace_info.metadata),
+            "input": trace_info.inputs,
+            "output": trace_info.outputs,
+            "tags": ["generate_name"],
+            "project_name": self.project,
+        }
+
+        trace = self.add_trace(trace_data)
+
+        span_data = {
+            "trace_id": trace.id,
+            "name": TraceTaskName.GENERATE_NAME_TRACE.value,
+            "start_time": trace_info.start_time,
+            "end_time": trace_info.end_time,
+            "metadata": wrap_metadata(trace_info.metadata),
+            "input": wrap_dict("input", trace_info.inputs),
+            "output": wrap_dict("output", trace_info.outputs),
+            "tags": ["generate_name"],
+        }
+
+        self.add_span(span_data)
+
+    def add_trace(self, opik_trace_data: dict) -> Trace:
+        try:
+            trace = self.opik_client.trace(**opik_trace_data)
+            logger.debug("Opik Trace created successfully")
+            return trace
+        except Exception as e:
+            raise ValueError(f"Opik Failed to create trace: {str(e)}")
+
+    def add_span(self, opik_span_data: dict):
+        try:
+            self.opik_client.span(**opik_span_data)
+            logger.debug("Opik Span created successfully")
+        except Exception as e:
+            raise ValueError(f"Opik Failed to create span: {str(e)}")
+
+    def api_check(self):
+        try:
+            self.opik_client.auth_check()
+            return True
+        except Exception as e:
+            logger.info(f"Opik API check failed: {str(e)}", exc_info=True)
+            raise ValueError(f"Opik API check failed: {str(e)}")
+
+    def get_project_url(self):
+        try:
+            return self.opik_client.get_project_url(project_name=self.project)
+        except Exception as e:
+            logger.info(f"Opik get run url failed: {str(e)}", exc_info=True)
+            raise ValueError(f"Opik get run url failed: {str(e)}")
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@ -17,6 +17,7 @@ from core.ops.entities.config_entity import (
    OPS_FILE_PATH,
    LangfuseConfig,
    LangSmithConfig,
+    OpikConfig,
    TracingProviderEnum,
 )
 from core.ops.entities.trace_entity import (
@ -32,6 +33,7 @@ from core.ops.entities.trace_entity import (
 )
 from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace
 from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace
+from core.ops.opik_trace.opik_trace import OpikDataTrace
 from core.ops.utils import get_message_data
 from extensions.ext_database import db
 from extensions.ext_storage import storage
@ -52,6 +54,12 @@ provider_config_map: dict[str, dict[str, Any]] = {
        "other_keys": ["project", "endpoint"],
        "trace_instance": LangSmithDataTrace,
    },
+    TracingProviderEnum.OPIK.value: {
+        "config_class": OpikConfig,
+        "secret_keys": ["api_key"],
+        "other_keys": ["project", "url", "workspace"],
+        "trace_instance": OpikDataTrace,
+    },
 }


--- a/api/core/provider_manager.py
+++ b/api/core/provider_manager.py
@ -23,7 +23,12 @@ from core.helper import encrypter
 from core.helper.model_provider_cache import ProviderCredentialsCache, ProviderCredentialsCacheType
 from core.helper.position_helper import is_filtered
 from core.model_runtime.entities.model_entities import ModelType
-from core.model_runtime.entities.provider_entities import CredentialFormSchema, FormType, ProviderEntity
+from core.model_runtime.entities.provider_entities import (
+    ConfigurateMethod,
+    CredentialFormSchema,
+    FormType,
+    ProviderEntity,
+)
 from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
 from extensions import ext_hosting_provider
 from extensions.ext_database import db
@ -839,11 +844,18 @@ class ProviderManager:
        :return:
        """
        # Get provider model credential secret variables
-        model_credential_secret_variables = self._extract_secret_variables(
-            provider_entity.model_credential_schema.credential_form_schemas
-            if provider_entity.model_credential_schema
-            else []
-        )
+        if ConfigurateMethod.PREDEFINED_MODEL in provider_entity.configurate_methods:
+            model_credential_secret_variables = self._extract_secret_variables(
+                provider_entity.provider_credential_schema.credential_form_schemas
+                if provider_entity.provider_credential_schema
+                else []
+            )
+        else:
+            model_credential_secret_variables = self._extract_secret_variables(
+                provider_entity.model_credential_schema.credential_form_schemas
+                if provider_entity.model_credential_schema
+                else []
+            )

        model_settings: list[ModelSettings] = []
        if not provider_model_settings:
--- a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py
+++ b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py
@ -258,7 +258,7 @@ class LindormVectorStore(BaseVector):
            hnsw_ef_construction = kwargs.pop("hnsw_ef_construction", 500)
            ivfpq_m = kwargs.pop("ivfpq_m", dimension)
            nlist = kwargs.pop("nlist", 1000)
-            centroids_use_hnsw = kwargs.pop("centroids_use_hnsw", True if nlist >= 5000 else False)
+            centroids_use_hnsw = kwargs.pop("centroids_use_hnsw", nlist >= 5000)
            centroids_hnsw_m = kwargs.pop("centroids_hnsw_m", 24)
            centroids_hnsw_ef_construct = kwargs.pop("centroids_hnsw_ef_construct", 500)
            centroids_hnsw_ef_search = kwargs.pop("centroids_hnsw_ef_search", 100)
@ -305,7 +305,7 @@ def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dic
    if method_name == "ivfpq":
        ivfpq_m = kwargs["ivfpq_m"]
        nlist = kwargs["nlist"]
-        centroids_use_hnsw = True if nlist > 10000 else False
+        centroids_use_hnsw = nlist > 10000
        centroids_hnsw_m = 24
        centroids_hnsw_ef_construct = 500
        centroids_hnsw_ef_search = 100
--- a/api/core/rag/datasource/vdb/pgvector/pgvector.py
+++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py
@ -57,6 +57,11 @@ CREATE TABLE IF NOT EXISTS {table_name} (
 ) using heap;
 """

+SQL_CREATE_INDEX = """
+CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} 
+USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
+"""
+

 class PGVector(BaseVector):
    def __init__(self, collection_name: str, config: PGVectorConfig):
@ -205,7 +210,10 @@ class PGVector(BaseVector):
            with self._get_cursor() as cur:
                cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
                cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension))
-                # TODO: create index https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
+                # PG hnsw index only support 2000 dimension or less
+                # ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
+                if dimension <= 2000:
+                    cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name))
            redis_client.set(collection_exist_cache_key, 1, ex=3600)


--- a/api/core/rag/extractor/firecrawl/firecrawl_app.py
+++ b/api/core/rag/extractor/firecrawl/firecrawl_app.py
@ -31,7 +31,7 @@ class FirecrawlApp:
                    "markdown": data.get("markdown"),
                }
            else:
-                raise Exception(f'Failed to scrape URL. Error: {response_data["error"]}')
+                raise Exception(f"Failed to scrape URL. Error: {response_data['error']}")

        elif response.status_code in {402, 409, 500}:
            error_message = response.json().get("error", "Unknown error occurred")
--- a/api/core/rag/extractor/notion_extractor.py
+++ b/api/core/rag/extractor/notion_extractor.py
@ -358,8 +358,7 @@ class NotionExtractor(BaseExtractor):

        if not data_source_binding:
            raise Exception(
-                f"No notion data source binding found for tenant {tenant_id} "
-                f"and notion workspace {notion_workspace_id}"
+                f"No notion data source binding found for tenant {tenant_id} and notion workspace {notion_workspace_id}"
            )

        return cast(str, data_source_binding.access_token)
--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@ -112,7 +112,7 @@ class QAIndexProcessor(BaseIndexProcessor):
            df = pd.read_csv(file)
            text_docs = []
            for index, row in df.iterrows():
-                data = Document(page_content=row[0], metadata={"answer": row[1]})
+                data = Document(page_content=row.iloc[0], metadata={"answer": row.iloc[1]})
                text_docs.append(data)
            if len(text_docs) == 0:
                raise ValueError("The CSV file is empty.")
--- a/api/core/tools/custom_tool/tool.py
+++ b/api/core/tools/custom_tool/tool.py
@ -94,9 +94,9 @@ class ApiTool(Tool):
            if "api_key_header_prefix" in credentials:
                api_key_header_prefix = credentials["api_key_header_prefix"]
                if api_key_header_prefix == "basic" and credentials["api_key_value"]:
-                    credentials["api_key_value"] = f'Basic {credentials["api_key_value"]}'
+                    credentials["api_key_value"] = f"Basic {credentials['api_key_value']}"
                elif api_key_header_prefix == "bearer" and credentials["api_key_value"]:
-                    credentials["api_key_value"] = f'Bearer {credentials["api_key_value"]}'
+                    credentials["api_key_value"] = f"Bearer {credentials['api_key_value']}"
                elif api_key_header_prefix == "custom":
                    pass

--- a/api/core/tools/utils/message_transformer.py
+++ b/api/core/tools/utils/message_transformer.py
@ -39,7 +39,7 @@ class ToolFileMessageTransformer:
                        conversation_id=conversation_id,
                    )

-                    url = f'/files/tools/{file.id}{guess_extension(file.mimetype) or ".png"}'
+                    url = f"/files/tools/{file.id}{guess_extension(file.mimetype) or '.png'}"

                    yield ToolInvokeMessage(
                        type=ToolInvokeMessage.MessageType.IMAGE_LINK,
@ -115,4 +115,4 @@ class ToolFileMessageTransformer:

    @classmethod
    def get_tool_file_url(cls, tool_file_id: str, extension: Optional[str]) -> str:
-        return f'/files/tools/{tool_file_id}{extension or ".bin"}'
+        return f"/files/tools/{tool_file_id}{extension or '.bin'}"
--- a/api/core/tools/utils/parser.py
+++ b/api/core/tools/utils/parser.py
@ -5,6 +5,7 @@ from json import loads as json_loads
 from json.decoder import JSONDecodeError
 from typing import Optional

+from flask import request
 from requests import get
 from yaml import YAMLError, safe_load  # type: ignore

@ -29,6 +30,10 @@ class ApiBasedToolSchemaParser:
            raise ToolProviderNotFoundError("No server found in the openapi yaml.")

        server_url = openapi["servers"][0]["url"]
+        request_env = request.headers.get("X-Request-Env")
+        if request_env:
+            matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
+            server_url = matched_servers[0] if matched_servers else server_url

        # list all interfaces
        interfaces = []
@ -112,7 +117,7 @@ class ApiBasedToolSchemaParser:
                                llm_description=property.get("description", ""),
                                default=property.get("default", None),
                                placeholder=I18nObject(
-                                    en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
+                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
                                ),
                            )

@ -144,7 +149,7 @@ class ApiBasedToolSchemaParser:
                if not path:
                    path = str(uuid.uuid4())

-                interface["operation"]["operationId"] = f'{path}_{interface["method"]}'
+                interface["operation"]["operationId"] = f"{path}_{interface['method']}"

            bundles.append(
                ApiToolBundle(
--- a/api/core/variables/segments.py
+++ b/api/core/variables/segments.py
@ -134,6 +134,10 @@ class ArrayStringSegment(ArraySegment):
    value_type: SegmentType = SegmentType.ARRAY_STRING
    value: Sequence[str]

+    @property
+    def text(self) -> str:
+        return json.dumps(self.value)
+

 class ArrayNumberSegment(ArraySegment):
    value_type: SegmentType = SegmentType.ARRAY_NUMBER
--- a/api/core/workflow/nodes/answer/base_stream_processor.py
+++ b/api/core/workflow/nodes/answer/base_stream_processor.py
@ -1,6 +1,7 @@
 import logging
 from abc import ABC, abstractmethod
 from collections.abc import Generator
+from typing import Optional

 from core.workflow.entities.variable_pool import VariablePool
 from core.workflow.graph_engine.entities.event import GraphEngineEvent, NodeRunExceptionEvent, NodeRunSucceededEvent
@ -48,25 +49,35 @@ class StreamProcessor(ABC):
                    # we remove the node maybe shortcut the answer node, so comment this code for now
                    # there is not effect on the answer node and the workflow, when we have a better solution
                    # we can open this code. Issues: #11542 #9560 #10638 #10564
-                    ids = self._fetch_node_ids_in_reachable_branch(edge.target_node_id)
-                    if "answer" in ids:
-                        continue
-                    else:
-                        reachable_node_ids.extend(ids)
+                    # ids = self._fetch_node_ids_in_reachable_branch(edge.target_node_id)
+                    # if "answer" in ids:
+                    #     continue
+                    # else:
+                    #     reachable_node_ids.extend(ids)
+
+                    # The branch_identify parameter is added to ensure that
+                    # only nodes in the correct logical branch are included.
+                    ids = self._fetch_node_ids_in_reachable_branch(edge.target_node_id, run_result.edge_source_handle)
+                    reachable_node_ids.extend(ids)
                else:
                    unreachable_first_node_ids.append(edge.target_node_id)

            for node_id in unreachable_first_node_ids:
                self._remove_node_ids_in_unreachable_branch(node_id, reachable_node_ids)

-    def _fetch_node_ids_in_reachable_branch(self, node_id: str) -> list[str]:
+    def _fetch_node_ids_in_reachable_branch(self, node_id: str, branch_identify: Optional[str] = None) -> list[str]:
        node_ids = []
        for edge in self.graph.edge_mapping.get(node_id, []):
            if edge.target_node_id == self.graph.root_node_id:
                continue

+            # Only follow edges that match the branch_identify or have no run_condition
+            if edge.run_condition and edge.run_condition.branch_identify:
+                if not branch_identify or edge.run_condition.branch_identify != branch_identify:
+                    continue
+
            node_ids.append(edge.target_node_id)
-            node_ids.extend(self._fetch_node_ids_in_reachable_branch(edge.target_node_id))
+            node_ids.extend(self._fetch_node_ids_in_reachable_branch(edge.target_node_id, branch_identify))
        return node_ids

    def _remove_node_ids_in_unreachable_branch(self, node_id: str, reachable_node_ids: list[str]) -> None:
--- a/api/core/workflow/nodes/http_request/executor.py
+++ b/api/core/workflow/nodes/http_request/executor.py
@ -253,9 +253,9 @@ class Executor:
        )
        if executor_response.size > threshold_size:
            raise ResponseSizeError(
-                f'{"File" if executor_response.is_file else "Text"} size is too large,'
-                f' max size is {threshold_size / 1024 / 1024:.2f} MB,'
-                f' but current size is {executor_response.readable_size}.'
+                f"{'File' if executor_response.is_file else 'Text'} size is too large,"
+                f" max size is {threshold_size / 1024 / 1024:.2f} MB,"
+                f" but current size is {executor_response.readable_size}."
            )

        return executor_response
@ -338,7 +338,7 @@ class Executor:
                if self.auth.config and self.auth.config.header:
                    authorization_header = self.auth.config.header
                if k.lower() == authorization_header.lower():
-                    raw += f'{k}: {"*" * len(v)}\r\n'
+                    raw += f"{k}: {'*' * len(v)}\r\n"
                    continue
            raw += f"{k}: {v}\r\n"

--- a/api/core/workflow/nodes/variable_assigner/v2/node.py
+++ b/api/core/workflow/nodes/variable_assigner/v2/node.py
@ -1,4 +1,5 @@
 import json
+from collections.abc import Sequence
 from typing import Any, cast

 from core.variables import SegmentType, Variable
@ -31,7 +32,7 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):
        inputs = self.node_data.model_dump()
        process_data: dict[str, Any] = {}
        # NOTE: This node has no outputs
-        updated_variables: list[Variable] = []
+        updated_variable_selectors: list[Sequence[str]] = []

        try:
            for item in self.node_data.items:
@ -98,7 +99,8 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):
                    value=item.value,
                )
                variable = variable.model_copy(update={"value": updated_value})
-                updated_variables.append(variable)
+                self.graph_runtime_state.variable_pool.add(variable.selector, variable)
+                updated_variable_selectors.append(variable.selector)
        except VariableOperatorNodeError as e:
            return NodeRunResult(
                status=WorkflowNodeExecutionStatus.FAILED,
@ -107,9 +109,15 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):
                error=str(e),
            )

+        # The `updated_variable_selectors` is a list contains list[str] which not hashable,
+        # remove the duplicated items first.
+        updated_variable_selectors = list(set(map(tuple, updated_variable_selectors)))
+
        # Update variables
-        for variable in updated_variables:
-            self.graph_runtime_state.variable_pool.add(variable.selector, variable)
+        for selector in updated_variable_selectors:
+            variable = self.graph_runtime_state.variable_pool.get(selector)
+            if not isinstance(variable, Variable):
+                raise VariableNotFoundError(variable_selector=selector)
            process_data[variable.name] = variable.value

            if variable.selector[0] == CONVERSATION_VARIABLE_NODE_ID:
--- a/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py
+++ b/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py
@ -26,7 +26,7 @@ def handle(sender, **kwargs):
                    tool_runtime=tool_runtime,
                    provider_name=tool_entity.provider_name,
                    provider_type=tool_entity.provider_type,
-                    identity_id=f'WORKFLOW.{app.id}.{node_data.get("id")}',
+                    identity_id=f"WORKFLOW.{app.id}.{node_data.get('id')}",
                )
                manager.delete_tool_parameters_cache()
            except:
--- a/api/extensions/storage/opendal_storage.py
+++ b/api/extensions/storage/opendal_storage.py
@ -34,7 +34,7 @@ class OpenDALStorage(BaseStorage):
            root = kwargs.get("root", "storage")
            Path(root).mkdir(parents=True, exist_ok=True)

-        self.op = opendal.Operator(scheme=scheme, **kwargs)
+        self.op = opendal.Operator(scheme=scheme, **kwargs)  # type: ignore
        logger.debug(f"opendal operator created with scheme {scheme}")
        retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True)
        self.op = self.op.layer(retry_layer)
--- a/api/fields/member_fields.py
+++ b/api/fields/member_fields.py
@ -1,6 +1,6 @@
 from flask_restful import fields  # type: ignore

-from libs.helper import TimestampField
+from libs.helper import AvatarUrlField, TimestampField

 simple_account_fields = {"id": fields.String, "name": fields.String, "email": fields.String}

@ -8,6 +8,7 @@ account_fields = {
    "id": fields.String,
    "name": fields.String,
    "avatar": fields.String,
+    "avatar_url": AvatarUrlField,
    "email": fields.String,
    "is_password_set": fields.Boolean,
    "interface_language": fields.String,
@ -22,6 +23,7 @@ account_with_role_fields = {
    "id": fields.String,
    "name": fields.String,
    "avatar": fields.String,
+    "avatar_url": AvatarUrlField,
    "email": fields.String,
    "last_login_at": TimestampField,
    "last_active_at": TimestampField,
--- a/api/libs/helper.py
+++ b/api/libs/helper.py
@ -43,6 +43,18 @@ class AppIconUrlField(fields.Raw):
        return None


+class AvatarUrlField(fields.Raw):
+    def output(self, key, obj):
+        if obj is None:
+            return None
+
+        from models.account import Account
+
+        if isinstance(obj, Account) and obj.avatar is not None:
+            return file_helpers.get_signed_file_url(obj.avatar)
+        return None
+
+
 class TimestampField(fields.Raw):
    def format(self, value) -> int:
        return int(value.timestamp())
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@ -13,6 +13,7 @@ from typing import Any, cast

 from sqlalchemy import func
 from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped

 from configs import dify_config
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -515,7 +516,7 @@ class DocumentSegment(db.Model):  # type: ignore[name-defined]
    tenant_id = db.Column(StringUUID, nullable=False)
    dataset_id = db.Column(StringUUID, nullable=False)
    document_id = db.Column(StringUUID, nullable=False)
-    position = db.Column(db.Integer, nullable=False)
+    position: Mapped[int]
    content = db.Column(db.Text, nullable=False)
    answer = db.Column(db.Text, nullable=True)
    word_count = db.Column(db.Integer, nullable=False)
--- a/api/models/model.py
+++ b/api/models/model.py
@ -1486,9 +1486,8 @@ class ApiToken(Base):
    def generate_api_key(prefix, n):
        while True:
            result = prefix + generate_string(n)
-            while db.session.query(ApiToken).filter(ApiToken.token == result).count() > 0:
-                result = prefix + generate_string(n)
-
+            if db.session.query(ApiToken).filter(ApiToken.token == result).count() > 0:
+                continue
            return result


--- a/api/poetry.lock
+++ b/api/poetry.lock
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -1,9 +1,10 @@
 [project]
 name = "dify-api"
 requires-python = ">=3.11,<3.13"
+dynamic = [ "dependencies" ]

 [build-system]
-requires = ["poetry-core"]
+requires = ["poetry-core>=2.0.0"]
 build-backend = "poetry.core.masonry.api"

 [tool.poetry]
@ -48,6 +49,7 @@ numpy = "~1.26.4"
 oci = "~2.135.1"
 openai = "~1.52.0"
 openpyxl = "~3.1.5"
+opik = "~1.3.4"
 pandas = { version = "~2.2.2", extras = ["performance", "excel"] }
 pandas-stubs = "~2.2.3.241009"
 psycogreen = "~1.0.2"
@ -157,4 +159,4 @@ pytest-mock = "~3.14.0"
 optional = true
 [tool.poetry.group.lint.dependencies]
 dotenv-linter = "~0.5.0"
-ruff = "~0.8.1"
+ruff = "~0.9.2"
--- a/api/services/annotation_service.py
+++ b/api/services/annotation_service.py
@ -286,7 +286,7 @@ class AppAnnotationService:
            df = pd.read_csv(file)
            result = []
            for index, row in df.iterrows():
-                content = {"question": row[0], "answer": row[1]}
+                content = {"question": row.iloc[0], "answer": row.iloc[1]}
                result.append(content)
            if len(result) == 0:
                raise ValueError("The CSV file is empty.")
--- a/api/services/app_dsl_service.py
+++ b/api/services/app_dsl_service.py
@ -1,7 +1,7 @@
 import logging
 import uuid
 from enum import StrEnum
-from typing import Optional, cast
+from typing import Optional
 from urllib.parse import urlparse
 from uuid import uuid4

@ -159,15 +159,6 @@ class AppDslService:
                        status=ImportStatus.FAILED,
                        error="Empty content from url",
                    )
-
-                try:
-                    content = cast(bytes, content).decode("utf-8")
-                except UnicodeDecodeError as e:
-                    return Import(
-                        id=import_id,
-                        status=ImportStatus.FAILED,
-                        error=f"Error decoding content: {e}",
-                    )
            except Exception as e:
                return Import(
                    id=import_id,
--- a/api/services/audio_service.py
+++ b/api/services/audio_service.py
@ -82,7 +82,7 @@ class AudioService:
        from app import app
        from extensions.ext_database import db

-        def invoke_tts(text_content: str, app_model, voice: Optional[str] = None):
+        def invoke_tts(text_content: str, app_model: App, voice: Optional[str] = None):
            with app.app_context():
                if app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}:
                    workflow = app_model.workflow
@ -95,6 +95,8 @@ class AudioService:

                    voice = features_dict["text_to_speech"].get("voice") if voice is None else voice
                else:
+                    if app_model.app_model_config is None:
+                        raise ValueError("AppModelConfig not found")
                    text_to_speech_dict = app_model.app_model_config.text_to_speech_dict

                    if not text_to_speech_dict.get("enabled"):
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -4,6 +4,7 @@ import logging
 import random
 import time
 import uuid
+from collections import Counter
 from typing import Any, Optional

 from flask_login import current_user  # type: ignore
@ -73,7 +74,7 @@ from tasks.sync_website_document_indexing_task import sync_website_document_inde

 class DatasetService:
    @staticmethod
-    def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None):
+    def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False):
        query = Dataset.query.filter(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc())

        if user:
@ -88,7 +89,7 @@ class DatasetService:
                else:
                    return [], 0
            else:
-                if user.current_role != TenantAccountRole.OWNER:
+                if user.current_role != TenantAccountRole.OWNER or not include_all:
                    # show all datasets that the user has permission to access
                    if permitted_dataset_ids:
                        query = query.filter(
@ -223,8 +224,7 @@ class DatasetService:
                )
            except LLMBadRequestError:
                raise ValueError(
-                    "No Embedding Model available. Please configure a valid provider "
-                    "in the Settings -> Model Provider."
+                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
                )
            except ProviderTokenNotInitError as ex:
                raise ValueError(f"The dataset in unavailable, due to: {ex.description}")
@ -880,7 +880,7 @@ class DocumentService:
                position = DocumentService.get_documents_position(dataset.id)
                document_ids = []
                duplicate_document_ids = []
-                if knowledge_config.data_source.info_list.data_source_type == "upload_file":
+                if knowledge_config.data_source.info_list.data_source_type == "upload_file":  # type: ignore
                    upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids  # type: ignore
                    for file_id in upload_file_list:
                        file = (
@ -922,7 +922,7 @@ class DocumentService:
                        document = DocumentService.build_document(
                            dataset,
                            dataset_process_rule.id,  # type: ignore
-                            knowledge_config.data_source.info_list.data_source_type,
+                            knowledge_config.data_source.info_list.data_source_type,  # type: ignore
                            knowledge_config.doc_form,
                            knowledge_config.doc_language,
                            data_source_info,
@ -937,8 +937,8 @@ class DocumentService:
                        document_ids.append(document.id)
                        documents.append(document)
                        position += 1
-                elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
-                    notion_info_list = knowledge_config.data_source.info_list.notion_info_list
+                elif knowledge_config.data_source.info_list.data_source_type == "notion_import":  # type: ignore
+                    notion_info_list = knowledge_config.data_source.info_list.notion_info_list  # type: ignore
                    if not notion_info_list:
                        raise ValueError("No notion info list found.")
                    exist_page_ids = []
@ -977,7 +977,7 @@ class DocumentService:
                                document = DocumentService.build_document(
                                    dataset,
                                    dataset_process_rule.id,  # type: ignore
-                                    knowledge_config.data_source.info_list.data_source_type,
+                                    knowledge_config.data_source.info_list.data_source_type,  # type: ignore
                                    knowledge_config.doc_form,
                                    knowledge_config.doc_language,
                                    data_source_info,
@ -997,8 +997,8 @@ class DocumentService:
                    # delete not selected documents
                    if len(exist_document) > 0:
                        clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
-                elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
-                    website_info = knowledge_config.data_source.info_list.website_info_list
+                elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":  # type: ignore
+                    website_info = knowledge_config.data_source.info_list.website_info_list  # type: ignore
                    if not website_info:
                        raise ValueError("No website info list found.")
                    urls = website_info.urls
@ -1017,7 +1017,7 @@ class DocumentService:
                        document = DocumentService.build_document(
                            dataset,
                            dataset_process_rule.id,  # type: ignore
-                            knowledge_config.data_source.info_list.data_source_type,
+                            knowledge_config.data_source.info_list.data_source_type,  # type: ignore
                            knowledge_config.doc_form,
                            knowledge_config.doc_language,
                            data_source_info,
@ -1216,20 +1216,20 @@ class DocumentService:

        if features.billing.enabled:
            count = 0
-            if knowledge_config.data_source.info_list.data_source_type == "upload_file":
+            if knowledge_config.data_source.info_list.data_source_type == "upload_file":  # type: ignore
                upload_file_list = (
-                    knowledge_config.data_source.info_list.file_info_list.file_ids
-                    if knowledge_config.data_source.info_list.file_info_list
+                    knowledge_config.data_source.info_list.file_info_list.file_ids  # type: ignore
+                    if knowledge_config.data_source.info_list.file_info_list  # type: ignore
                    else []
                )
                count = len(upload_file_list)
-            elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
-                notion_info_list = knowledge_config.data_source.info_list.notion_info_list
+            elif knowledge_config.data_source.info_list.data_source_type == "notion_import":  # type: ignore
+                notion_info_list = knowledge_config.data_source.info_list.notion_info_list  # type: ignore
                if notion_info_list:
                    for notion_info in notion_info_list:
                        count = count + len(notion_info.pages)
-            elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
-                website_info = knowledge_config.data_source.info_list.website_info_list
+            elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":  # type: ignore
+                website_info = knowledge_config.data_source.info_list.website_info_list  # type: ignore
                if website_info:
                    count = len(website_info.urls)
            batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
@ -1260,7 +1260,7 @@ class DocumentService:
        dataset = Dataset(
            tenant_id=tenant_id,
            name="",
-            data_source_type=knowledge_config.data_source.info_list.data_source_type,
+            data_source_type=knowledge_config.data_source.info_list.data_source_type,  # type: ignore
            indexing_technique=knowledge_config.indexing_technique,
            created_by=account.id,
            embedding_model=knowledge_config.embedding_model,
@ -1635,8 +1635,11 @@ class SegmentService:
                    segment.answer = args.answer
                    segment.word_count += len(args.answer) if args.answer else 0
                word_count_change = segment.word_count - word_count_change
+                keyword_changed = False
                if args.keywords:
-                    segment.keywords = args.keywords
+                    if Counter(segment.keywords) != Counter(args.keywords):
+                        segment.keywords = args.keywords
+                        keyword_changed = True
                segment.enabled = True
                segment.disabled_at = None
                segment.disabled_by = None
@ -1647,13 +1650,6 @@ class SegmentService:
                    document.word_count = max(0, document.word_count + word_count_change)
                    db.session.add(document)
                # update segment index task
-                if args.enabled:
-                    VectorService.create_segments_vector(
-                        [args.keywords] if args.keywords else None,
-                        [segment],
-                        dataset,
-                        document.doc_form,
-                    )
                if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks:
                    # regenerate child chunks
                    # get embedding model instance
@ -1686,6 +1682,14 @@ class SegmentService:
                    VectorService.generate_child_chunks(
                        segment, document, dataset, embedding_model_instance, processing_rule, True
                    )
+                elif document.doc_form in (IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX):
+                    if args.enabled or keyword_changed:
+                        VectorService.create_segments_vector(
+                            [args.keywords] if args.keywords else None,
+                            [segment],
+                            dataset,
+                            document.doc_form,
+                        )
            else:
                segment_hash = helper.generate_text_hash(content)
                tokens = 0
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@ -97,7 +97,7 @@ class KnowledgeConfig(BaseModel):
    original_document_id: Optional[str] = None
    duplicate: bool = True
    indexing_technique: Literal["high_quality", "economy"]
-    data_source: DataSource
+    data_source: Optional[DataSource] = None
    process_rule: Optional[ProcessRule] = None
    retrieval_model: Optional[RetrievalModel] = None
    doc_form: str = "text_model"
--- a/api/services/external_knowledge_service.py
+++ b/api/services/external_knowledge_service.py
@ -155,7 +155,7 @@ class ExternalDatasetService:
            if custom_parameters:
                for parameter in custom_parameters:
                    if parameter.get("required", False) and not process_parameter.get(parameter.get("name")):
-                        raise ValueError(f'{parameter.get("name")} is required')
+                        raise ValueError(f"{parameter.get('name')} is required")

    @staticmethod
    def process_external_api(
--- a/api/services/ops_service.py
+++ b/api/services/ops_service.py
@ -59,6 +59,15 @@ class OpsService:
            except Exception:
                new_decrypt_tracing_config.update({"project_url": "https://smith.langchain.com/"})

+        if tracing_provider == "opik" and (
+            "project_url" not in decrypt_tracing_config or not decrypt_tracing_config.get("project_url")
+        ):
+            try:
+                project_url = OpsTraceManager.get_trace_config_project_url(decrypt_tracing_config, tracing_provider)
+                new_decrypt_tracing_config.update({"project_url": project_url})
+            except Exception:
+                new_decrypt_tracing_config.update({"project_url": "https://www.comet.com/opik/"})
+
        trace_config_data.tracing_config = new_decrypt_tracing_config
        return trace_config_data.to_dict()

@ -92,7 +101,7 @@ class OpsService:
        if tracing_provider == "langfuse":
            project_key = OpsTraceManager.get_trace_config_project_key(tracing_config, tracing_provider)
            project_url = "{host}/project/{key}".format(host=tracing_config.get("host"), key=project_key)
-        elif tracing_provider == "langsmith":
+        elif tracing_provider in ("langsmith", "opik"):
            project_url = OpsTraceManager.get_trace_config_project_url(tracing_config, tracing_provider)
        else:
            project_url = None
--- a/api/tasks/batch_create_segment_to_index_task.py
+++ b/api/tasks/batch_create_segment_to_index_task.py
@ -5,7 +5,8 @@ import uuid

 import click
 from celery import shared_task  # type: ignore
-from sqlalchemy import func
+from sqlalchemy import func, select
+from sqlalchemy.orm import Session

 from core.model_manager import ModelManager
 from core.model_runtime.entities.model_entities import ModelType
@ -18,7 +19,12 @@ from services.vector_service import VectorService

@shared_task(queue="dataset")
 def batch_create_segment_to_index_task(
-    job_id: str, content: list, dataset_id: str, document_id: str, tenant_id: str, user_id: str
+    job_id: str,
+    content: list,
+    dataset_id: str,
+    document_id: str,
+    tenant_id: str,
+    user_id: str,
 ):
    """
    Async batch create segment to index
@ -37,25 +43,35 @@ def batch_create_segment_to_index_task(
    indexing_cache_key = "segment_batch_import_{}".format(job_id)

    try:
-        dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
-        if not dataset:
-            raise ValueError("Dataset not exist.")
+        with Session(db.engine) as session:
+            dataset = session.get(Dataset, dataset_id)
+            if not dataset:
+                raise ValueError("Dataset not exist.")

-        dataset_document = db.session.query(Document).filter(Document.id == document_id).first()
-        if not dataset_document:
-            raise ValueError("Document not exist.")
+            dataset_document = session.get(Document, document_id)
+            if not dataset_document:
+                raise ValueError("Document not exist.")

-        if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
-            raise ValueError("Document is not available.")
-        document_segments = []
-        embedding_model = None
-        if dataset.indexing_technique == "high_quality":
-            model_manager = ModelManager()
-            embedding_model = model_manager.get_model_instance(
-                tenant_id=dataset.tenant_id,
-                provider=dataset.embedding_model_provider,
-                model_type=ModelType.TEXT_EMBEDDING,
-                model=dataset.embedding_model,
+            if (
+                not dataset_document.enabled
+                or dataset_document.archived
+                or dataset_document.indexing_status != "completed"
+            ):
+                raise ValueError("Document is not available.")
+            document_segments = []
+            embedding_model = None
+            if dataset.indexing_technique == "high_quality":
+                model_manager = ModelManager()
+                embedding_model = model_manager.get_model_instance(
+                    tenant_id=dataset.tenant_id,
+                    provider=dataset.embedding_model_provider,
+                    model_type=ModelType.TEXT_EMBEDDING,
+                    model=dataset.embedding_model,
+                )
+            word_count_change = 0
+            segments_to_insert: list[str] = []
+            max_position_stmt = select(func.max(DocumentSegment.position)).where(
+                DocumentSegment.document_id == dataset_document.id
            )
        word_count_change = 0
        if embedding_model:
@ -103,7 +119,10 @@ def batch_create_segment_to_index_task(
        redis_client.setex(indexing_cache_key, 600, "completed")
        end_at = time.perf_counter()
        logging.info(
-            click.style("Segment batch created job: {} latency: {}".format(job_id, end_at - start_at), fg="green")
+            click.style(
+                "Segment batch created job: {} latency: {}".format(job_id, end_at - start_at),
+                fg="green",
+            )
        )
    except Exception:
        logging.exception("Segments batch created index failed")
--- a/api/tests/artifact_tests/dependencies/test_dependencies_sorted.py
+++ b/api/tests/artifact_tests/dependencies/test_dependencies_sorted.py
@ -44,6 +44,6 @@ def test_duplicated_dependency_crossing_groups() -> None:
        dependency_names = list(dependencies.keys())
        all_dependency_names.extend(dependency_names)
    expected_all_dependency_names = set(all_dependency_names)
-    assert sorted(expected_all_dependency_names) == sorted(
-        all_dependency_names
-    ), "Duplicated dependencies crossing groups are found"
+    assert sorted(expected_all_dependency_names) == sorted(all_dependency_names), (
+        "Duplicated dependencies crossing groups are found"
+    )
--- a/api/tests/integration_tests/controllers/test_controllers.py
+++ b/api/tests/integration_tests/controllers/test_controllers.py
@ -4,7 +4,6 @@ from app_fixture import mock_user  # type: ignore


 def test_post_requires_login(app):
-    with app.test_client() as client:
-        with patch("flask_login.utils._get_user", mock_user):
-            response = client.get("/console/api/data-source/integrates")
-            assert response.status_code == 200
+    with app.test_client() as client, patch("flask_login.utils._get_user", mock_user):
+        response = client.get("/console/api/data-source/integrates")
+        assert response.status_code == 200
--- a/api/tests/integration_tests/vdb/opensearch/test_opensearch.py
+++ b/api/tests/integration_tests/vdb/opensearch/test_opensearch.py
@ -89,9 +89,9 @@ class TestOpenSearchVector:
        print("Actual document ID:", hits_by_vector[0].metadata["document_id"] if hits_by_vector else "No hits")

        assert len(hits_by_vector) > 0, f"Expected at least one hit, got {len(hits_by_vector)}"
-        assert (
-            hits_by_vector[0].metadata["document_id"] == self.example_doc_id
-        ), f"Expected document ID {self.example_doc_id}, got {hits_by_vector[0].metadata['document_id']}"
+        assert hits_by_vector[0].metadata["document_id"] == self.example_doc_id, (
+            f"Expected document ID {self.example_doc_id}, got {hits_by_vector[0].metadata['document_id']}"
+        )

    def test_get_ids_by_metadata_field(self):
        mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}}
--- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
+++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
@ -434,11 +434,11 @@ def test_fetch_files_with_non_existent_variable(llm_node):
 #         jinja2_variables=[],
 #     )

-#     # Verify the result
-#     assert len(prompt_messages) == len(scenario.expected_messages), f"Scenario failed: {scenario.description}"
-#     assert (
-#         prompt_messages == scenario.expected_messages
-#     ), f"Message content mismatch in scenario: {scenario.description}"
+# # Verify the result
+# assert len(prompt_messages) == len(scenario.expected_messages), f"Scenario failed: {scenario.description}"
+# assert prompt_messages == scenario.expected_messages, (
+#     f"Message content mismatch in scenario: {scenario.description}"
+# )


 def test_handle_list_messages_basic(llm_node):
--- a/api/tests/unit_tests/services/workflow/test_workflow_converter.py
+++ b/api/tests/unit_tests/services/workflow/test_workflow_converter.py
@ -401,8 +401,7 @@ def test__convert_to_llm_node_for_workflow_advanced_completion_model(default_var
    prompt_template = PromptTemplateEntity(
        prompt_type=PromptTemplateEntity.PromptType.ADVANCED,
        advanced_completion_prompt_template=AdvancedCompletionPromptTemplateEntity(
-            prompt="You are a helpful assistant named {{name}}.\n\nContext:\n{{#context#}}\n\n"
-            "Human: hi\nAssistant: ",
+            prompt="You are a helpful assistant named {{name}}.\n\nContext:\n{{#context#}}\n\nHuman: hi\nAssistant: ",
            role_prefix=AdvancedCompletionPromptTemplateEntity.RolePrefixEntity(user="Human", assistant="Assistant"),
        ),
    )
--- a/dev/reformat
+++ b/dev/reformat
@ -9,10 +9,10 @@ if ! command -v ruff &> /dev/null || ! command -v dotenv-linter &> /dev/null; th
 fi

 # run ruff linter
-poetry run -C api ruff check --fix ./api
+poetry run -C api ruff check --fix ./

 # run ruff formatter
-poetry run -C api ruff format ./api
+poetry run -C api ruff format ./

 # run dotenv-linter linter
-poetry run -C api dotenv-linter ./api/.env.example ./web/.env.example
+poetry run -P api dotenv-linter ./api/.env.example ./web/.env.example
--- a/dev/sync-poetry
+++ b/dev/sync-poetry
@ -12,7 +12,7 @@ if [ $? -ne 0 ]; then
    # update poetry.lock
    # refreshing lockfile only without updating locked versions
    echo "poetry.lock is outdated, refreshing without updating locked versions ..."
-    poetry lock -C api --no-update
+    poetry lock -C api
 else
  echo "poetry.lock is ready."
 fi
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@ -65,6 +65,7 @@ services:
      MARKETPLACE_API_URL: ${MARKETPLACE_API_URL:-https://marketplace.dify.ai}
      MARKETPLACE_URL: ${MARKETPLACE_URL:-https://marketplace.dify.ai}
      TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-}
+      INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-}

  # The postgres database.
  db:
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@ -473,6 +473,7 @@ services:
      MARKETPLACE_API_URL: ${MARKETPLACE_API_URL:-https://marketplace.dify.ai}
      MARKETPLACE_URL: ${MARKETPLACE_URL:-https://marketplace.dify.ai}
      TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-}
+      INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-}

  # The postgres database.
  db:
--- a/web/.env.example
+++ b/web/.env.example
@ -34,3 +34,6 @@ NEXT_PUBLIC_CSP_WHITELIST=
 NEXT_PUBLIC_GITHUB_ACCESS_TOKEN=
 # The maximum number of top-k value for RAG.
 NEXT_PUBLIC_TOP_K_MAX_VALUE=10
+
+# The maximum number of tokens for segmentation
+NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config-popup.tsx
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config-popup.tsx
@ -5,7 +5,7 @@ import { useTranslation } from 'react-i18next'
 import { useBoolean } from 'ahooks'
 import TracingIcon from './tracing-icon'
 import ProviderPanel from './provider-panel'
-import type { LangFuseConfig, LangSmithConfig } from './type'
+import type { LangFuseConfig, LangSmithConfig, OpikConfig } from './type'
 import { TracingProvider } from './type'
 import ProviderConfigModal from './provider-config-modal'
 import Indicator from '@/app/components/header/indicator'
@ -25,7 +25,8 @@ export type PopupProps = {
  onChooseProvider: (provider: TracingProvider) => void
  langSmithConfig: LangSmithConfig | null
  langFuseConfig: LangFuseConfig | null
-  onConfigUpdated: (provider: TracingProvider, payload: LangSmithConfig | LangFuseConfig) => void
+  opikConfig: OpikConfig | null
+  onConfigUpdated: (provider: TracingProvider, payload: LangSmithConfig | LangFuseConfig | OpikConfig) => void
  onConfigRemoved: (provider: TracingProvider) => void
 }

@ -38,6 +39,7 @@ const ConfigPopup: FC<PopupProps> = ({
  onChooseProvider,
  langSmithConfig,
  langFuseConfig,
+  opikConfig,
  onConfigUpdated,
  onConfigRemoved,
 }) => {
@ -61,7 +63,7 @@ const ConfigPopup: FC<PopupProps> = ({
    }
  }, [onChooseProvider])

-  const handleConfigUpdated = useCallback((payload: LangSmithConfig | LangFuseConfig) => {
+  const handleConfigUpdated = useCallback((payload: LangSmithConfig | LangFuseConfig | OpikConfig) => {
    onConfigUpdated(currentProvider!, payload)
    hideConfigModal()
  }, [currentProvider, hideConfigModal, onConfigUpdated])
@ -71,8 +73,8 @@ const ConfigPopup: FC<PopupProps> = ({
    hideConfigModal()
  }, [currentProvider, hideConfigModal, onConfigRemoved])

-  const providerAllConfigured = langSmithConfig && langFuseConfig
-  const providerAllNotConfigured = !langSmithConfig && !langFuseConfig
+  const providerAllConfigured = langSmithConfig && langFuseConfig && opikConfig
+  const providerAllNotConfigured = !langSmithConfig && !langFuseConfig && !opikConfig

  const switchContent = (
    <Switch
@ -91,6 +93,7 @@ const ConfigPopup: FC<PopupProps> = ({
      onConfig={handleOnConfig(TracingProvider.langSmith)}
      isChosen={chosenProvider === TracingProvider.langSmith}
      onChoose={handleOnChoose(TracingProvider.langSmith)}
+      key="langSmith-provider-panel"
    />
  )

@ -103,9 +106,61 @@ const ConfigPopup: FC<PopupProps> = ({
      onConfig={handleOnConfig(TracingProvider.langfuse)}
      isChosen={chosenProvider === TracingProvider.langfuse}
      onChoose={handleOnChoose(TracingProvider.langfuse)}
+      key="langfuse-provider-panel"
    />
  )

+  const opikPanel = (
+    <ProviderPanel
+      type={TracingProvider.opik}
+      readOnly={readOnly}
+      config={opikConfig}
+      hasConfigured={!!opikConfig}
+      onConfig={handleOnConfig(TracingProvider.opik)}
+      isChosen={chosenProvider === TracingProvider.opik}
+      onChoose={handleOnChoose(TracingProvider.opik)}
+      key="opik-provider-panel"
+    />
+  )
+
+  const configuredProviderPanel = () => {
+    const configuredPanels: ProviderPanel[] = []
+
+    if (langSmithConfig)
+      configuredPanels.push(langSmithPanel)
+
+    if (langFuseConfig)
+      configuredPanels.push(langfusePanel)
+
+    if (opikConfig)
+      configuredPanels.push(opikPanel)
+
+    return configuredPanels
+  }
+
+  const moreProviderPanel = () => {
+    const notConfiguredPanels: ProviderPanel[] = []
+
+    if (!langSmithConfig)
+      notConfiguredPanels.push(langSmithPanel)
+
+    if (!langFuseConfig)
+      notConfiguredPanels.push(langfusePanel)
+
+    if (!opikConfig)
+      notConfiguredPanels.push(opikPanel)
+
+    return notConfiguredPanels
+  }
+
+  const configuredProviderConfig = () => {
+    if (currentProvider === TracingProvider.langSmith)
+      return langSmithConfig
+    if (currentProvider === TracingProvider.langfuse)
+      return langFuseConfig
+    return opikConfig
+  }
+
  return (
    <div className='w-[420px] p-4 rounded-2xl bg-components-panel-bg border-[0.5px] border-components-panel-border shadow-xl'>
      <div className='flex justify-between items-center'>
@ -146,6 +201,7 @@ const ConfigPopup: FC<PopupProps> = ({
              <div className='mt-2 space-y-2'>
                {langSmithPanel}
                {langfusePanel}
+                {opikPanel}
              </div>
            </>
          )
@ -167,7 +223,7 @@ const ConfigPopup: FC<PopupProps> = ({
        <ProviderConfigModal
          appId={appId}
          type={currentProvider!}
-          payload={currentProvider === TracingProvider.langSmith ? langSmithConfig : langFuseConfig}
+          payload={configuredProviderConfig()}
          onCancel={hideConfigModal}
          onSaved={handleConfigUpdated}
          onChosen={onChooseProvider}
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts
@ -3,4 +3,5 @@ import { TracingProvider } from './type'
 export const docURL = {
  [TracingProvider.langSmith]: 'https://docs.smith.langchain.com/',
  [TracingProvider.langfuse]: 'https://docs.langfuse.com',
+  [TracingProvider.opik]: 'https://www.comet.com/docs/opik/tracing/integrations/dify#setup-instructions',
 }
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/panel.tsx
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/panel.tsx
@ -74,11 +74,20 @@ const Panel: FC = () => {
    })
  }
  const inUseTracingProvider: TracingProvider | null = tracingStatus?.tracing_provider || null
-  const InUseProviderIcon = inUseTracingProvider === TracingProvider.langSmith ? LangsmithIcon : LangfuseIcon
+
+  const InUseProviderIcon
+    = inUseTracingProvider === TracingProvider.langSmith
+      ? LangsmithIcon
+      : inUseTracingProvider === TracingProvider.langfuse
+        ? LangfuseIcon
+        : inUseTracingProvider === TracingProvider.opik
+          ? OpikIcon
+          : null

  const [langSmithConfig, setLangSmithConfig] = useState<LangSmithConfig | null>(null)
  const [langFuseConfig, setLangFuseConfig] = useState<LangFuseConfig | null>(null)
-  const hasConfiguredTracing = !!(langSmithConfig || langFuseConfig)
+  const [opikConfig, setOpikConfig] = useState<OpikConfig | null>(null)
+  const hasConfiguredTracing = !!(langSmithConfig || langFuseConfig || opikConfig)

  const fetchTracingConfig = async () => {
    const { tracing_config: langSmithConfig, has_not_configured: langSmithHasNotConfig } = await doFetchTracingConfig({ appId, provider: TracingProvider.langSmith })
@ -87,6 +96,9 @@ const Panel: FC = () => {
    const { tracing_config: langFuseConfig, has_not_configured: langFuseHasNotConfig } = await doFetchTracingConfig({ appId, provider: TracingProvider.langfuse })
    if (!langFuseHasNotConfig)
      setLangFuseConfig(langFuseConfig as LangFuseConfig)
+    const { tracing_config: opikConfig, has_not_configured: OpikHasNotConfig } = await doFetchTracingConfig({ appId, provider: TracingProvider.opik })
+    if (!OpikHasNotConfig)
+      setOpikConfig(opikConfig as OpikConfig)
  }

  const handleTracingConfigUpdated = async (provider: TracingProvider) => {
@ -94,15 +106,19 @@ const Panel: FC = () => {
    const { tracing_config } = await doFetchTracingConfig({ appId, provider })
    if (provider === TracingProvider.langSmith)
      setLangSmithConfig(tracing_config as LangSmithConfig)
-    else
+    else if (provider === TracingProvider.langSmith)
      setLangFuseConfig(tracing_config as LangFuseConfig)
+    else if (provider === TracingProvider.opik)
+      setOpikConfig(tracing_config as OpikConfig)
  }

  const handleTracingConfigRemoved = (provider: TracingProvider) => {
    if (provider === TracingProvider.langSmith)
      setLangSmithConfig(null)
-    else
+    else if (provider === TracingProvider.langSmith)
      setLangFuseConfig(null)
+    else if (provider === TracingProvider.opik)
+      setOpikConfig(null)
    if (provider === inUseTracingProvider) {
      handleTracingStatusChange({
        enabled: false,
@ -201,6 +217,24 @@ const Panel: FC = () => {
            </div>
          </>
        )}
+        <div className='flex items-center' onClick={e => e.stopPropagation()}>
+          <ConfigButton
+            appId={appId}
+            readOnly={readOnly}
+            hasConfigured
+            className='ml-2'
+            enabled={enabled}
+            onStatusChange={handleTracingEnabledChange}
+            chosenProvider={inUseTracingProvider}
+            onChooseProvider={handleChooseProvider}
+            langSmithConfig={langSmithConfig}
+            langFuseConfig={langFuseConfig}
+            opikConfig={opikConfig}
+            onConfigUpdated={handleTracingConfigUpdated}
+            onConfigRemoved={handleTracingConfigRemoved}
+            controlShowPopup={controlShowPopup}
+          />
+        </div>
      </div>
    </div>
  )
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx
@ -4,7 +4,7 @@ import React, { useCallback, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useBoolean } from 'ahooks'
 import Field from './field'
-import type { LangFuseConfig, LangSmithConfig } from './type'
+import type { LangFuseConfig, LangSmithConfig, OpikConfig } from './type'
 import { TracingProvider } from './type'
 import { docURL } from './config'
 import {
@ -22,10 +22,10 @@ import Divider from '@/app/components/base/divider'
 type Props = {
  appId: string
  type: TracingProvider
-  payload?: LangSmithConfig | LangFuseConfig | null
+  payload?: LangSmithConfig | LangFuseConfig | OpikConfig | null
  onRemoved: () => void
  onCancel: () => void
-  onSaved: (payload: LangSmithConfig | LangFuseConfig) => void
+  onSaved: (payload: LangSmithConfig | LangFuseConfig | OpikConfig) => void
  onChosen: (provider: TracingProvider) => void
 }

@ -43,6 +43,13 @@ const langFuseConfigTemplate = {
  host: '',
 }

+const opikConfigTemplate = {
+  api_key: '',
+  project: '',
+  url: '',
+  workspace: '',
+}
+
 const ProviderConfigModal: FC<Props> = ({
  appId,
  type,
@ -56,14 +63,17 @@ const ProviderConfigModal: FC<Props> = ({
  const isEdit = !!payload
  const isAdd = !isEdit
  const [isSaving, setIsSaving] = useState(false)
-  const [config, setConfig] = useState<LangSmithConfig | LangFuseConfig>((() => {
+  const [config, setConfig] = useState<LangSmithConfig | LangFuseConfig | OpikConfig>((() => {
    if (isEdit)
      return payload

    if (type === TracingProvider.langSmith)
      return langSmithConfigTemplate

-    return langFuseConfigTemplate
+    else if (type === TracingProvider.langfuse)
+      return langFuseConfigTemplate
+
+    return opikConfigTemplate
  })())
  const [isShowRemoveConfirm, {
    setTrue: showRemoveConfirm,
@ -112,6 +122,10 @@ const ProviderConfigModal: FC<Props> = ({
        errorMessage = t('common.errorMsg.fieldRequired', { field: 'Host' })
    }

+    if (type === TracingProvider.opik) {
+      const postData = config as OpikConfig
+    }
+
    return errorMessage
  }, [config, t, type])
  const handleSave = useCallback(async () => {
@ -216,6 +230,38 @@ const ProviderConfigModal: FC<Props> = ({
                          />
                        </>
                      )}
+                      {type === TracingProvider.opik && (
+                        <>
+                          <Field
+                            label='API Key'
+                            labelClassName='!text-sm'
+                            value={(config as OpikConfig).api_key}
+                            onChange={handleConfigChange('api_key')}
+                            placeholder={t(`${I18N_PREFIX}.placeholder`, { key: 'API Key' })!}
+                          />
+                          <Field
+                            label={t(`${I18N_PREFIX}.project`)!}
+                            labelClassName='!text-sm'
+                            value={(config as OpikConfig).project}
+                            onChange={handleConfigChange('project')}
+                            placeholder={t(`${I18N_PREFIX}.placeholder`, { key: t(`${I18N_PREFIX}.project`) })!}
+                          />
+                          <Field
+                            label='Workspace'
+                            labelClassName='!text-sm'
+                            value={(config as OpikConfig).workspace}
+                            onChange={handleConfigChange('workspace')}
+                            placeholder={'default'}
+                          />
+                          <Field
+                            label='Url'
+                            labelClassName='!text-sm'
+                            value={(config as OpikConfig).url}
+                            onChange={handleConfigChange('url')}
+                            placeholder={'https://www.comet.com/opik/api/'}
+                          />
+                        </>
+                      )}

                    </div>
                    <div className='my-8 flex justify-between items-center h-8'>
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-panel.tsx
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-panel.tsx
@ -7,7 +7,8 @@ import {
 import { useTranslation } from 'react-i18next'
 import { TracingProvider } from './type'
 import cn from '@/utils/classnames'
-import { LangfuseIconBig, LangsmithIconBig } from '@/app/components/base/icons/src/public/tracing'
+import { LangfuseIconBig, LangsmithIconBig, OpikIconBig } from '@/app/components/base/icons/src/public/tracing'
+import { Settings04 } from '@/app/components/base/icons/src/vender/line/general'
 import { Eye as View } from '@/app/components/base/icons/src/vender/solid/general'

 const I18N_PREFIX = 'app.tracing'
@ -26,6 +27,7 @@ const getIcon = (type: TracingProvider) => {
  return ({
    [TracingProvider.langSmith]: LangsmithIconBig,
    [TracingProvider.langfuse]: LangfuseIconBig,
+    [TracingProvider.opik]: OpikIconBig,
  })[type]
 }

@ -79,7 +81,7 @@ const ProviderPanel: FC<Props> = ({
          <div className={'flex justify-between items-center space-x-1'}>
            {hasConfigured && (
              <div className='flex px-2 items-center h-6 bg-components-button-secondary-bg rounded-md border-[0.5px] border-components-button-secondary-border shadow-xs cursor-pointer text-text-secondary space-x-1' onClick={viewBtnClick} >
-                <View className='w-3 h-3'/>
+                <View className='w-3 h-3' />
                <div className='text-xs font-medium'>{t(`${I18N_PREFIX}.view`)}</div>
              </div>
            )}
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/type.ts
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/type.ts
@ -1,6 +1,7 @@
 export enum TracingProvider {
  langSmith = 'langsmith',
  langfuse = 'langfuse',
+  opik = 'opik',
 }

 export type LangSmithConfig = {
@ -14,3 +15,10 @@ export type LangFuseConfig = {
  secret_key: string
  host: string
 }
+
+export type OpikConfig = {
+  api_key: string
+  project: string
+  workspace: string
+  url: string
+}
--- a/web/app/(commonLayout)/datasets/Container.tsx
+++ b/web/app/(commonLayout)/datasets/Container.tsx
@ -4,7 +4,8 @@
 import { useEffect, useMemo, useRef, useState } from 'react'
 import { useRouter } from 'next/navigation'
 import { useTranslation } from 'react-i18next'
-import { useDebounceFn } from 'ahooks'
+import { useBoolean, useDebounceFn } from 'ahooks'
+import { useQuery } from '@tanstack/react-query'

 // Components
 import ExternalAPIPanel from '../../components/datasets/external-api/external-api-panel'
@ -16,7 +17,9 @@ import TabSliderNew from '@/app/components/base/tab-slider-new'
 import TagManagementModal from '@/app/components/base/tag-management'
 import TagFilter from '@/app/components/base/tag-management/filter'
 import Button from '@/app/components/base/button'
+import Input from '@/app/components/base/input'
 import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
+import CheckboxWithLabel from '@/app/components/datasets/create/website/base/checkbox-with-label'

 // Services
 import { fetchDatasetApiBaseUrl } from '@/service/datasets'
@ -27,15 +30,13 @@ import { useStore as useTagStore } from '@/app/components/base/tag-management/st
 import { useAppContext } from '@/context/app-context'
 import { useExternalApiPanel } from '@/context/external-api-panel-context'

-import { useQuery } from '@tanstack/react-query'
-import Input from '@/app/components/base/input'
-
 const Container = () => {
  const { t } = useTranslation()
  const router = useRouter()
-  const { currentWorkspace } = useAppContext()
+  const { currentWorkspace, isCurrentWorkspaceOwner } = useAppContext()
  const showTagManagementModal = useTagStore(s => s.showTagManagementModal)
  const { showExternalApiPanel, setShowExternalApiPanel } = useExternalApiPanel()
+  const [includeAll, { toggle: toggleIncludeAll }] = useBoolean(false)

  const options = useMemo(() => {
    return [
@ -81,7 +82,7 @@ const Container = () => {
  }, [currentWorkspace, router])

  return (
-    <div ref={containerRef} className='grow relative flex flex-col bg-background-body overflow-y-auto'>
+    <div ref={containerRef} className='grow relative flex flex-col bg-background-body overflow-y-auto scroll-container'>
      <div className='sticky top-0 flex justify-between pt-4 px-12 pb-2 leading-[56px] bg-background-body z-10 flex-wrap gap-y-2'>
        <TabSliderNew
          value={activeTab}
@ -90,6 +91,14 @@ const Container = () => {
        />
        {activeTab === 'dataset' && (
          <div className='flex items-center justify-center gap-2'>
+            {isCurrentWorkspaceOwner && <CheckboxWithLabel
+              isChecked={includeAll}
+              onChange={toggleIncludeAll}
+              label={t('dataset.allKnowledge')}
+              labelClassName='system-md-regular text-text-secondary'
+              className='mr-2'
+              tooltip={t('dataset.allKnowledgeDescription') as string}
+            />}
            <TagFilter type='knowledge' value={tagFilterValue} onChange={handleTagsChange} />
            <Input
              showLeftIcon
@ -113,7 +122,7 @@ const Container = () => {
      </div>
      {activeTab === 'dataset' && (
        <>
-          <Datasets containerRef={containerRef} tags={tagIDs} keywords={searchKeywords} />
+          <Datasets containerRef={containerRef} tags={tagIDs} keywords={searchKeywords} includeAll={includeAll} />
          <DatasetFooter />
          {showTagManagementModal && (
            <TagManagementModal type='knowledge' show={showTagManagementModal} />
--- a/web/app/(commonLayout)/datasets/Datasets.tsx
+++ b/web/app/(commonLayout)/datasets/Datasets.tsx
@ -6,7 +6,7 @@ import { debounce } from 'lodash-es'
 import { useTranslation } from 'react-i18next'
 import NewDatasetCard from './NewDatasetCard'
 import DatasetCard from './DatasetCard'
-import type { DataSetListResponse } from '@/models/datasets'
+import type { DataSetListResponse, FetchDatasetsParams } from '@/models/datasets'
 import { fetchDatasets } from '@/service/datasets'
 import { useAppContext } from '@/context/app-context'

@ -15,13 +15,15 @@ const getKey = (
  previousPageData: DataSetListResponse,
  tags: string[],
  keyword: string,
+  includeAll: boolean,
 ) => {
  if (!pageIndex || previousPageData.has_more) {
-    const params: any = {
+    const params: FetchDatasetsParams = {
      url: 'datasets',
      params: {
        page: pageIndex + 1,
        limit: 30,
+        include_all: includeAll,
      },
    }
    if (tags.length)
@ -37,16 +39,18 @@ type Props = {
  containerRef: React.RefObject<HTMLDivElement>
  tags: string[]
  keywords: string
+  includeAll: boolean
 }

 const Datasets = ({
  containerRef,
  tags,
  keywords,
+  includeAll,
 }: Props) => {
  const { isCurrentWorkspaceEditor } = useAppContext()
  const { data, isLoading, setSize, mutate } = useSWRInfinite(
-    (pageIndex: number, previousPageData: DataSetListResponse) => getKey(pageIndex, previousPageData, tags, keywords),
+    (pageIndex: number, previousPageData: DataSetListResponse) => getKey(pageIndex, previousPageData, tags, keywords, includeAll),
    fetchDatasets,
    { revalidateFirstPage: false, revalidateAll: true },
  )
--- a/web/app/(commonLayout)/datasets/Doc.tsx
+++ b/web/app/(commonLayout)/datasets/Doc.tsx
@ -1,7 +1,9 @@
 'use client'

-import { type FC, useEffect } from 'react'
+import { useEffect, useState } from 'react'
 import { useContext } from 'use-context-selector'
+import { useTranslation } from 'react-i18next'
+import { RiListUnordered } from '@remixicon/react'
 import TemplateEn from './template/template.en.mdx'
 import TemplateZh from './template/template.zh.mdx'
 import I18n from '@/context/i18n'
@ -10,25 +12,106 @@ import { LanguagesSupported } from '@/i18n/language'
 type DocProps = {
  apiBaseUrl: string
 }
-const Doc: FC<DocProps> = ({
-  apiBaseUrl,
-}) => {
-  const { locale } = useContext(I18n)

+const Doc = ({ apiBaseUrl }: DocProps) => {
+  const { locale } = useContext(I18n)
+  const { t } = useTranslation()
+  const [toc, setToc] = useState<Array<{ href: string; text: string }>>([])
+  const [isTocExpanded, setIsTocExpanded] = useState(false)
+
+  // Set initial TOC expanded state based on screen width
  useEffect(() => {
-    const hash = location.hash
-    if (hash)
-      document.querySelector(hash)?.scrollIntoView()
+    const mediaQuery = window.matchMedia('(min-width: 1280px)')
+    setIsTocExpanded(mediaQuery.matches)
  }, [])

+  // Extract TOC from article content
+  useEffect(() => {
+    const extractTOC = () => {
+      const article = document.querySelector('article')
+      if (article) {
+        const headings = article.querySelectorAll('h2')
+        const tocItems = Array.from(headings).map((heading) => {
+          const anchor = heading.querySelector('a')
+          if (anchor) {
+            return {
+              href: anchor.getAttribute('href') || '',
+              text: anchor.textContent || '',
+            }
+          }
+          return null
+        }).filter((item): item is { href: string; text: string } => item !== null)
+        setToc(tocItems)
+      }
+    }
+
+    setTimeout(extractTOC, 0)
+  }, [locale])
+
+  // Handle TOC item click
+  const handleTocClick = (e: React.MouseEvent<HTMLAnchorElement>, item: { href: string; text: string }) => {
+    e.preventDefault()
+    const targetId = item.href.replace('#', '')
+    const element = document.getElementById(targetId)
+    if (element) {
+      const scrollContainer = document.querySelector('.scroll-container')
+      if (scrollContainer) {
+        const headerOffset = -40
+        const elementTop = element.offsetTop - headerOffset
+        scrollContainer.scrollTo({
+          top: elementTop,
+          behavior: 'smooth',
+        })
+      }
+    }
+  }
+
  return (
-    <article className='mx-1 px-4 sm:mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'>
-      {
-        locale !== LanguagesSupported[1]
+    <div className="flex">
+      <div className={`fixed right-16 top-32 z-10 transition-all ${isTocExpanded ? 'w-64' : 'w-10'}`}>
+        {isTocExpanded
+          ? (
+            <nav className="toc w-full bg-gray-50 p-4 rounded-lg shadow-md max-h-[calc(100vh-150px)] overflow-y-auto">
+              <div className="flex justify-between items-center mb-4">
+                <h3 className="text-lg font-semibold">{t('appApi.develop.toc')}</h3>
+                <button
+                  onClick={() => setIsTocExpanded(false)}
+                  className="text-gray-500 hover:text-gray-700"
+                >
+                  ✕
+                </button>
+              </div>
+              <ul className="space-y-2">
+                {toc.map((item, index) => (
+                  <li key={index}>
+                    <a
+                      href={item.href}
+                      className="text-gray-600 hover:text-gray-900 hover:underline transition-colors duration-200"
+                      onClick={e => handleTocClick(e, item)}
+                    >
+                      {item.text}
+                    </a>
+                  </li>
+                ))}
+              </ul>
+            </nav>
+          )
+          : (
+            <button
+              onClick={() => setIsTocExpanded(true)}
+              className="w-10 h-10 bg-gray-50 rounded-full shadow-md flex items-center justify-center hover:bg-gray-100 transition-colors duration-200"
+            >
+              <RiListUnordered className="w-6 h-6" />
+            </button>
+          )}
+      </div>
+      <article className='mx-1 px-4 sm:mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'>
+        {locale !== LanguagesSupported[1]
          ? <TemplateEn apiBaseUrl={apiBaseUrl} />
          : <TemplateZh apiBaseUrl={apiBaseUrl} />
-      }
-    </article>
+        }
+      </article>
+    </div>
  )
 }

--- a/web/app/(commonLayout)/datasets/template/template.en.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.en.mdx
@ -1,5 +1,5 @@
 import { CodeGroup } from '@/app/components/develop/code.tsx'
-import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx'
+import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx'

 # Knowledge API

@ -80,6 +80,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
              - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
      </Property>
+      <PropertyInstruction>When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used.</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        Retrieval model
+          - <code>search_method</code> (string) Search method
+            - <code>hybrid_search</code> Hybrid search
+            - <code>semantic_search</code> Semantic search
+            - <code>full_text_search</code> Full-text search
+          - <code>reranking_enable</code> (bool) Whether to enable reranking
+          - <code>reranking_mode</code> (object) Rerank model configuration
+            - <code>reranking_provider_name</code> (string) Rerank model provider
+            - <code>reranking_model_name</code> (string) Rerank model name
+          - <code>top_k</code> (int) Number of results to return
+          - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
+          - <code>score_threshold</code> (float) Score threshold
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding model name
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding model provider
+      </Property>
    </Properties>
  </Col>
  <Col sticky>
@ -197,6 +218,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
      <Property name='file' type='multipart/form-data' key='file'>
        Files that need to be uploaded.
      </Property>
+      <PropertyInstruction>When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used.</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        Retrieval model
+          - <code>search_method</code> (string) Search method
+            - <code>hybrid_search</code> Hybrid search
+            - <code>semantic_search</code> Semantic search
+            - <code>full_text_search</code> Full-text search
+          - <code>reranking_enable</code> (bool) Whether to enable reranking
+          - <code>reranking_mode</code> (object) Rerank model configuration
+            - <code>reranking_provider_name</code> (string) Rerank model provider
+            - <code>reranking_model_name</code> (string) Rerank model name
+          - <code>top_k</code> (int) Number of results to return
+          - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
+          - <code>score_threshold</code> (float) Score threshold
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding model name
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding model provider
+      </Property>
    </Properties>
  </Col>
  <Col sticky>
@ -1106,6 +1148,57 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from

 <hr className='ml-0 mr-0' />

+<Heading
+  url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
+  method='GET'
+  title='Get Upload File'
+  name='#get_upload_file'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        Knowledge ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        Document ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "id": "file_id",
+      "name": "file_name",
+      "size": 1024,
+      "extension": "txt",
+      "url": "preview_url",
+      "download_url": "download_url",
+      "mime_type": "text/plain",
+      "created_by": "user_id",
+      "created_at": 1728734540,
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+<hr className='ml-0 mr-0' />
+
 <Heading
  url='/datasets/{dataset_id}/retrieve'
  method='POST'
@ -1137,10 +1230,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
        - <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
            - <code>reranking_provider_name</code> (string) Rerank model provider
            - <code>reranking_model_name</code> (string) Rerank model name
-        - <code>weights</code> (double) Semantic search weight setting in hybrid search mode
+        - <code>weights</code> (float) Semantic search weight setting in hybrid search mode
        - <code>top_k</code> (integer) Number of results to return (optional)
        - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
-        - <code>score_threshold</code> (double) Score threshold
+        - <code>score_threshold</code> (float) Score threshold
      </Property>
      <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
          Unused field
--- a/web/app/(commonLayout)/datasets/template/template.zh.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx
@ -1,5 +1,5 @@
 import { CodeGroup } from '@/app/components/develop/code.tsx'
-import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx'
+import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx'

 # 知识库 API

@ -80,6 +80,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
              - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时，段与段之间存在一定的重叠部分（选填）
      </Property>
+      <PropertyInstruction>当知识库未设置任何参数的时候，首次上传需要提供以下参数，未提供则使用默认选项：</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        检索模式
+          - <code>search_method</code> (string) 检索方法
+            - <code>hybrid_search</code> 混合检索
+            - <code>semantic_search</code> 语义检索
+            - <code>full_text_search</code> 全文检索
+          - <code>reranking_enable</code> (bool) 是否开启rerank
+          - <code>reranking_model</code> (object) Rerank 模型配置
+            - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
+            - <code>reranking_model_name</code> (string) Rerank 模型的名称
+          - <code>top_k</code> (int) 召回条数
+          - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
+          - <code>score_threshold</code> (float) 召回分数限制
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding 模型名称
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding 模型供应商
+      </Property>
    </Properties>
  </Col>
  <Col sticky>
@ -197,6 +218,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
      <Property name='file' type='multipart/form-data' key='file'>
        需要上传的文件。
      </Property>
+      <PropertyInstruction>当知识库未设置任何参数的时候，首次上传需要提供以下参数，未提供则使用默认选项：</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        检索模式
+          - <code>search_method</code> (string) 检索方法
+            - <code>hybrid_search</code> 混合检索
+            - <code>semantic_search</code> 语义检索
+            - <code>full_text_search</code> 全文检索
+          - <code>reranking_enable</code> (bool) 是否开启rerank
+          - <code>reranking_model</code> (object) Rerank 模型配置
+            - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
+            - <code>reranking_model_name</code> (string) Rerank 模型的名称
+          - <code>top_k</code> (int) 召回条数
+          - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
+          - <code>score_threshold</code> (float) 召回分数限制
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding 模型名称
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding 模型供应商
+      </Property>
    </Properties>
  </Col>
  <Col sticky>
@ -1107,6 +1149,57 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from

 <hr className='ml-0 mr-0' />

+<Heading
+  url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
+  method='GET'
+  title='获取上传文件'
+  name='#get_upload_file'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        知识库 ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        文档 ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "id": "file_id",
+      "name": "file_name",
+      "size": 1024,
+      "extension": "txt",
+      "url": "preview_url",
+      "download_url": "download_url",
+      "mime_type": "text/plain",
+      "created_by": "user_id",
+      "created_at": 1728734540,
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+<hr className='ml-0 mr-0' />
+
 <Heading
  url='/datasets/{dataset_id}/retrieve'
  method='POST'
@ -1135,13 +1228,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>full_text_search</code> 全文检索
          - <code>hybrid_search</code> 混合检索
        - <code>reranking_enable</code> (bool) 是否启用 Reranking，非必填，如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
-        - <code>reranking_mode</code> (object) Rerank模型配置，非必填，如果启用了 reranking 则传值
+        - <code>reranking_mode</code> (object) Rerank 模型配置，非必填，如果启用了 reranking 则传值
            - <code>reranking_provider_name</code> (string) Rerank 模型提供商
            - <code>reranking_model_name</code> (string) Rerank 模型名称
-        - <code>weights</code> (double) 混合检索模式下语意检索的权重设置
+        - <code>weights</code> (float) 混合检索模式下语意检索的权重设置
        - <code>top_k</code> (integer) 返回结果数量，非必填
        - <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
-        - <code>score_threshold</code> (double) Score 阈值
+        - <code>score_threshold</code> (float) Score 阈值
      </Property>
      <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
          未启用字段
--- a/web/app/(shareLayout)/layout.tsx
+++ b/web/app/(shareLayout)/layout.tsx
@ -1,7 +1,6 @@
 import React from 'react'
 import type { FC } from 'react'
 import type { Metadata } from 'next'
-import GA, { GaType } from '@/app/components/base/ga'

 export const metadata: Metadata = {
  icons: 'data:,', // prevent browser from using default favicon
@ -12,7 +11,6 @@ const Layout: FC<{
 }> = ({ children }) => {
  return (
    <div className="min-w-[300px] h-full pb-[env(safe-area-inset-bottom)]">
-      <GA gaType={GaType.webapp} />
      {children}
    </div>
  )
--- a/web/app/account/account-page/AvatarWithEdit.tsx
+++ b/web/app/account/account-page/AvatarWithEdit.tsx
@ -0,0 +1,122 @@
+'use client'
+
+import type { Area } from 'react-easy-crop'
+import React, { useCallback, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { useContext } from 'use-context-selector'
+import { RiPencilLine } from '@remixicon/react'
+import { updateUserProfile } from '@/service/common'
+import { ToastContext } from '@/app/components/base/toast'
+import ImageInput, { type OnImageInput } from '@/app/components/base/app-icon-picker/ImageInput'
+import Modal from '@/app/components/base/modal'
+import Divider from '@/app/components/base/divider'
+import Button from '@/app/components/base/button'
+import Avatar, { type AvatarProps } from '@/app/components/base/avatar'
+import { useLocalFileUploader } from '@/app/components/base/image-uploader/hooks'
+import type { ImageFile } from '@/types/app'
+import getCroppedImg from '@/app/components/base/app-icon-picker/utils'
+import { DISABLE_UPLOAD_IMAGE_AS_ICON } from '@/config'
+
+type InputImageInfo = { file: File } | { tempUrl: string; croppedAreaPixels: Area; fileName: string }
+type AvatarWithEditProps = AvatarProps & { onSave?: () => void }
+
+const AvatarWithEdit = ({ onSave, ...props }: AvatarWithEditProps) => {
+  const { t } = useTranslation()
+  const { notify } = useContext(ToastContext)
+
+  const [inputImageInfo, setInputImageInfo] = useState<InputImageInfo>()
+  const [isShowAvatarPicker, setIsShowAvatarPicker] = useState(false)
+  const [uploading, setUploading] = useState(false)
+
+  const handleImageInput: OnImageInput = useCallback(async (isCropped: boolean, fileOrTempUrl: string | File, croppedAreaPixels?: Area, fileName?: string) => {
+    setInputImageInfo(
+      isCropped
+        ? { tempUrl: fileOrTempUrl as string, croppedAreaPixels: croppedAreaPixels!, fileName: fileName! }
+        : { file: fileOrTempUrl as File },
+    )
+  }, [setInputImageInfo])
+
+  const handleSaveAvatar = useCallback(async (uploadedFileId: string) => {
+    try {
+      await updateUserProfile({ url: 'account/avatar', body: { avatar: uploadedFileId } })
+      notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+      setIsShowAvatarPicker(false)
+      onSave?.()
+    }
+    catch (e) {
+      notify({ type: 'error', message: (e as Error).message })
+    }
+  }, [notify, onSave, t])
+
+  const { handleLocalFileUpload } = useLocalFileUploader({
+    limit: 3,
+    disabled: false,
+    onUpload: (imageFile: ImageFile) => {
+      if (imageFile.progress === 100) {
+        setUploading(false)
+        setInputImageInfo(undefined)
+        handleSaveAvatar(imageFile.fileId)
+      }
+
+      // Error
+      if (imageFile.progress === -1)
+        setUploading(false)
+    },
+  })
+
+  const handleSelect = useCallback(async () => {
+    if (!inputImageInfo)
+      return
+    setUploading(true)
+    if ('file' in inputImageInfo) {
+      handleLocalFileUpload(inputImageInfo.file)
+      return
+    }
+    const blob = await getCroppedImg(inputImageInfo.tempUrl, inputImageInfo.croppedAreaPixels, inputImageInfo.fileName)
+    const file = new File([blob], inputImageInfo.fileName, { type: blob.type })
+    handleLocalFileUpload(file)
+  }, [handleLocalFileUpload, inputImageInfo])
+
+  if (DISABLE_UPLOAD_IMAGE_AS_ICON)
+    return <Avatar {...props} />
+
+  return (
+    <>
+      <div>
+        <div className="relative group">
+          <Avatar {...props} />
+          <div
+            onClick={() => { setIsShowAvatarPicker(true) }}
+            className="absolute inset-0 bg-black bg-opacity-50 rounded-full opacity-0 group-hover:opacity-100 transition-opacity cursor-pointer flex items-center justify-center"
+          >
+            <span className="text-white text-xs">
+              <RiPencilLine />
+            </span>
+          </div>
+        </div>
+      </div>
+
+      <Modal
+        closable
+        className="!w-[362px] !p-0"
+        isShow={isShowAvatarPicker}
+        onClose={() => setIsShowAvatarPicker(false)}
+      >
+        <ImageInput onImageInput={handleImageInput} cropShape='round' />
+        <Divider className='m-0' />
+
+        <div className='w-full flex items-center justify-center p-3 gap-2'>
+          <Button className='w-full' onClick={() => setIsShowAvatarPicker(false)}>
+            {t('app.iconPicker.cancel')}
+          </Button>
+
+          <Button variant="primary" className='w-full' disabled={uploading || !inputImageInfo} loading={uploading} onClick={handleSelect}>
+            {t('app.iconPicker.ok')}
+          </Button>
+        </div>
+      </Modal>
+    </>
+  )
+}
+
+export default AvatarWithEdit
--- a/web/app/account/account-page/index.tsx
+++ b/web/app/account/account-page/index.tsx
@ -5,6 +5,7 @@ import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
 import DeleteAccount from '../delete-account'
 import s from './index.module.css'
+import AvatarWithEdit from './AvatarWithEdit'
 import Collapse from '@/app/components/header/account-setting/collapse'
 import type { IItem } from '@/app/components/header/account-setting/collapse'
 import Modal from '@/app/components/base/modal'
@ -13,7 +14,6 @@ import { updateUserProfile } from '@/service/common'
 import { useAppContext } from '@/context/app-context'
 import { ToastContext } from '@/app/components/base/toast'
 import AppIcon from '@/app/components/base/app-icon'
-import Avatar from '@/app/components/base/avatar'
 import { IS_CE_EDITION } from '@/config'
 import Input from '@/app/components/base/input'

@ -133,7 +133,7 @@ export default function AccountPage() {
        <h4 className='title-2xl-semi-bold text-text-primary'>{t('common.account.myAccount')}</h4>
      </div>
      <div className='mb-8 p-6 rounded-xl flex items-center bg-gradient-to-r from-background-gradient-bg-fill-chat-bg-2 to-background-gradient-bg-fill-chat-bg-1'>
-        <Avatar name={userProfile.name} size={64} />
+        <AvatarWithEdit avatar={userProfile.avatar_url} name={userProfile.name} onSave={ mutateUserProfile } size={64} />
        <div className='ml-4'>
          <p className='system-xl-semibold text-text-primary'>{userProfile.name}</p>
          <p className='system-xs-regular text-text-tertiary'>{userProfile.email}</p>
--- a/web/app/account/avatar.tsx
+++ b/web/app/account/avatar.tsx
@ -45,7 +45,7 @@ export default function AppSelector() {
                    ${open && 'bg-components-panel-bg-blur'}
                  `}
              >
-                <Avatar name={userProfile.name} size={32} />
+                <Avatar avatar={userProfile.avatar_url} name={userProfile.name} size={32} />
              </Menu.Button>
            </div>
            <Transition
@ -71,7 +71,7 @@ export default function AppSelector() {
                        <div className='system-md-medium text-text-primary break-all'>{userProfile.name}</div>
                        <div className='system-xs-regular text-text-tertiary break-all'>{userProfile.email}</div>
                      </div>
-                      <Avatar name={userProfile.name} size={32} />
+                      <Avatar avatar={userProfile.avatar_url} name={userProfile.name} size={32} />
                    </div>
                  </div>
                </Menu.Item>
--- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
+++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
@ -12,7 +12,7 @@ import Divider from '@/app/components/base/divider'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
-import { type DataSet } from '@/models/datasets'
+import type { DataSet } from '@/models/datasets'
 import { useToastContext } from '@/app/components/base/toast'
 import { updateDatasetSetting } from '@/service/datasets'
 import { useAppContext } from '@/context/app-context'
@ -33,7 +33,7 @@ import { ModelTypeEnum } from '@/app/components/header/account-setting/model-pro
 import { fetchMembers } from '@/service/common'
 import type { Member } from '@/models/common'

-interface SettingsModalProps {
+type SettingsModalProps = {
  currentDataset: DataSet
  onCancel: () => void
  onSave: (newDataset: DataSet) => void
--- a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx
+++ b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx
@ -149,7 +149,7 @@ const ChatItem: FC<ChatItemProps> = ({
      suggestedQuestions={suggestedQuestions}
      onSend={doSend}
      showPromptLog
-      questionIcon={<Avatar name={userProfile.name} size={40} />}
+      questionIcon={<Avatar avatar={userProfile.avatar_url} name={userProfile.name} size={40} />}
      allToolIcons={allToolIcons}
      hideLogModal
      noSpacing
--- a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
+++ b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
@ -175,7 +175,7 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
      onRegenerate={doRegenerate}
      onStopResponding={handleStop}
      showPromptLog
-      questionIcon={<Avatar name={userProfile.name} size={40} />}
+      questionIcon={<Avatar avatar={userProfile.avatar_url} name={userProfile.name} size={40} />}
      allToolIcons={allToolIcons}
      onAnnotationEdited={handleAnnotationEdited}
      onAnnotationAdded={handleAnnotationAdded}
--- a/web/app/components/app/log/var-panel.tsx
+++ b/web/app/components/app/log/var-panel.tsx
@ -39,7 +39,7 @@ const VarPanel: FC<Props> = ({
        }
      </div>
      {!isCollapse && (
-        <div className='p-3 flex flex-col gap-2'>
+        <div className='p-3 flex flex-col gap-2 max-h-[500px] overflow-y-auto'>
          {varList.map(({ label, value }, index) => (
            <div key={index} className='flex py-2 system-xs-medium'>
              <div className='shrink-0 w-[128px] flex text-text-accent'>
--- a/web/app/components/app/overview/appChart.tsx
+++ b/web/app/components/app/overview/appChart.tsx
@ -6,6 +6,7 @@ import type { EChartsOption } from 'echarts'
 import useSWR from 'swr'
 import dayjs from 'dayjs'
 import { get } from 'lodash-es'
+import Decimal from 'decimal.js'
 import { useTranslation } from 'react-i18next'
 import { formatNumber } from '@/utils/format'
 import Basic from '@/app/components/app-sidebar/basic'
@ -60,10 +61,8 @@ const CHART_TYPE_CONFIG: Record<string, IChartConfigType> = {
  },
 }

-const sum = (arr: number[]): number => {
-  return arr.reduce((acr, cur) => {
-    return acr + cur
-  })
+const sum = (arr: Decimal.Value[]): number => {
+  return Decimal.sum(...arr).toNumber()
 }

 const defaultPeriod = {
--- a/web/app/components/app/overview/settings/index.tsx
+++ b/web/app/components/app/overview/settings/index.tsx
@ -1,12 +1,12 @@
 'use client'
 import type { FC } from 'react'
-import React, { useEffect, useState } from 'react'
-import { ChevronRightIcon } from '@heroicons/react/20/solid'
+import React, { useCallback, useEffect, useState } from 'react'
 import Link from 'next/link'
 import { Trans, useTranslation } from 'react-i18next'
-import { useContextSelector } from 'use-context-selector'
+import { useContext, useContextSelector } from 'use-context-selector'
 import Modal from '@/app/components/base/modal'
 import Button from '@/app/components/base/button'
+import Divider from '@/app/components/base/divider'
 import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
 import AppIcon from '@/app/components/base/app-icon'
@ -18,10 +18,13 @@ import { useToastContext } from '@/app/components/base/toast'
 import { languages } from '@/i18n/language'
 import Tooltip from '@/app/components/base/tooltip'
 import AppContext, { useAppContext } from '@/context/app-context'
+import { useProviderContext } from '@/context/provider-context'
+import { useModalContext } from '@/context/modal-context'
 import type { AppIconSelection } from '@/app/components/base/app-icon-picker'
 import AppIconPicker from '@/app/components/base/app-icon-picker'
-import Divider from '@/app/components/base/divider'
+import I18n from '@/context/i18n'
 import cn from '@/utils/classnames'
+import { ChevronRightIcon } from '@heroicons/react/24/outline'

 export type ISettingsModalProps = {
  isChat: boolean
@ -85,6 +88,7 @@ const SettingsModal: FC<ISettingsModalProps> = ({
    chatColorTheme: chat_color_theme,
    chatColorThemeInverted: chat_color_theme_inverted,
    copyright,
+    copyrightSwitchValue: !!copyright,
    privacyPolicy: privacy_policy,
    customDisclaimer: custom_disclaimer,
    show_workflow_steps,
@ -94,6 +98,7 @@ const SettingsModal: FC<ISettingsModalProps> = ({
  const [language, setLanguage] = useState(default_language)
  const [saveLoading, setSaveLoading] = useState(false)
  const { t } = useTranslation()
+  const { locale } = useContext(I18n)

  const [showAppIconPicker, setShowAppIconPicker] = useState(false)
  const [appIcon, setAppIcon] = useState<AppIconSelection>(
@ -101,7 +106,16 @@ const SettingsModal: FC<ISettingsModalProps> = ({
      ? { type: 'image', url: icon_url!, fileId: icon }
      : { type: 'emoji', icon, background: icon_background! },
  )
-  const isChatBot = appInfo.mode === 'chat' || appInfo.mode === 'advanced-chat' || appInfo.mode === 'agent-chat'
+
+  const { enableBilling, plan } = useProviderContext()
+  const { setShowPricingModal, setShowAccountSettingModal } = useModalContext()
+  const isFreePlan = plan.type === 'sandbox'
+  const handlePlanClick = useCallback(() => {
+    if (isFreePlan)
+      setShowPricingModal()
+    else
+      setShowAccountSettingModal({ payload: 'billing' })
+  }, [isFreePlan, setShowAccountSettingModal, setShowPricingModal])

  useEffect(() => {
    setInputInfo({
@ -110,6 +124,7 @@ const SettingsModal: FC<ISettingsModalProps> = ({
      chatColorTheme: chat_color_theme,
      chatColorThemeInverted: chat_color_theme_inverted,
      copyright,
+      copyrightSwitchValue: !!copyright,
      privacyPolicy: privacy_policy,
      customDisclaimer: custom_disclaimer,
      show_workflow_steps,
@ -159,7 +174,11 @@ const SettingsModal: FC<ISettingsModalProps> = ({
      chat_color_theme: inputInfo.chatColorTheme,
      chat_color_theme_inverted: inputInfo.chatColorThemeInverted,
      prompt_public: false,
-      copyright: inputInfo.copyright,
+      copyright: isFreePlan
+        ? ''
+        : inputInfo.copyrightSwitchValue
+          ? inputInfo.copyright
+          : '',
      privacy_policy: inputInfo.privacyPolicy,
      custom_disclaimer: inputInfo.customDisclaimer,
      icon_type: appIcon.type,
@ -193,8 +212,8 @@ const SettingsModal: FC<ISettingsModalProps> = ({
  return (
    <>
      <Modal
-        title={t(`${prefixSettings}.title`)}
        isShow={isShow}
+        closable={false}
        onClose={onHide}
        className='max-w-[520px]'
      >
@ -327,7 +346,9 @@ const SettingsModal: FC<ISettingsModalProps> = ({
          <Button className='mr-2' onClick={onHide}>{t('common.operation.cancel')}</Button>
          <Button variant='primary' onClick={onClickSave} loading={saveLoading}>{t('common.operation.save')}</Button>
        </div>
-        {showAppIconPicker && <AppIconPicker
+      </Modal >
+      {showAppIconPicker && (
+        <AppIconPicker
          onSelect={(payload) => {
            setAppIcon(payload)
            setShowAppIconPicker(false)
@ -338,8 +359,8 @@ const SettingsModal: FC<ISettingsModalProps> = ({
              : { type: 'emoji', icon, background: icon_background! })
            setShowAppIconPicker(false)
          }}
-        />}
-      </Modal >
+        />
+      )}
    </>

  )
--- a/web/app/components/app/text-generate/item/index.tsx
+++ b/web/app/components/app/text-generate/item/index.tsx
@ -306,8 +306,14 @@ const GenerationItem: FC<IGenerationItemProps> = ({
            }
            <div className={`flex ${contentClassName}`}>
              <div className='grow w-0'>
-                {siteInfo && siteInfo.show_workflow_steps && workflowProcessData && (
-                  <WorkflowProcessItem data={workflowProcessData} expand={workflowProcessData.expand} hideProcessDetail={hideProcessDetail} />
+                {siteInfo && workflowProcessData && (
+                  <WorkflowProcessItem
+                    data={workflowProcessData}
+                    expand={workflowProcessData.expand}
+                    hideProcessDetail={hideProcessDetail}
+                    hideInfo={hideProcessDetail}
+                    readonly={!siteInfo.show_workflow_steps}
+                  />
                )}
                {workflowProcessData && !isError && (
                  <ResultTab data={workflowProcessData} content={content} currentTab={currentTab} onCurrentTabChange={setCurrentTab} />
--- a/web/app/components/app/type-selector/index.tsx
+++ b/web/app/components/app/type-selector/index.tsx
@ -9,7 +9,7 @@ import {
  PortalToFollowElemTrigger,
 } from '@/app/components/base/portal-to-follow-elem'
 import { BubbleTextMod, ChatBot, ListSparkle, Logic } from '@/app/components/base/icons/src/vender/solid/communication'
-import { type AppMode } from '@/types/app'
+import type { AppMode } from '@/types/app'
 export type AppSelectorProps = {
  value: Array<AppMode>
  onChange: (value: AppSelectorProps['value']) => void
--- a/web/app/components/base/app-icon-picker/ImageInput.tsx
+++ b/web/app/components/base/app-icon-picker/ImageInput.tsx
@ -2,8 +2,7 @@

 import type { ChangeEvent, FC } from 'react'
 import { createRef, useEffect, useState } from 'react'
-import type { Area } from 'react-easy-crop'
-import Cropper from 'react-easy-crop'
+import Cropper, { type Area, type CropperProps } from 'react-easy-crop'
 import classNames from 'classnames'

 import { ImagePlus } from '../icons/src/vender/line/images'
@ -18,11 +17,13 @@ export type OnImageInput = {

 type UploaderProps = {
  className?: string
+  cropShape?: CropperProps['cropShape']
  onImageInput?: OnImageInput
 }

 const ImageInput: FC<UploaderProps> = ({
  className,
+  cropShape,
  onImageInput,
 }) => {
  const [inputImage, setInputImage] = useState<{ file: File; url: string }>()
@ -78,6 +79,7 @@ const ImageInput: FC<UploaderProps> = ({
        crop={crop}
        zoom={zoom}
        aspect={1}
+        cropShape={cropShape}
        onCropChange={setCrop}
        onCropComplete={onCropComplete}
        onZoomChange={setZoom}
--- a/web/app/components/base/avatar/index.tsx
+++ b/web/app/components/base/avatar/index.tsx
@ -2,9 +2,9 @@
 import { useState } from 'react'
 import cn from '@/utils/classnames'

-type AvatarProps = {
+export type AvatarProps = {
  name: string
-  avatar?: string
+  avatar: string | null
  size?: number
  className?: string
  textClassName?: string
--- a/Show More
+++ b/Show More