From ed93954adde8abf4fa7ecc89ce3d3edafe93b689 Mon Sep 17 00:00:00 2001 From: Joel Date: Thu, 9 Jan 2025 14:42:01 +0800 Subject: [PATCH] feat: support config max chunk length by env in frontend --- docker/docker-compose.yaml | 1 + web/.env.example | 3 +++ web/app/components/datasets/create/step-two/inputs.tsx | 6 ++++-- web/app/layout.tsx | 1 + web/docker/entrypoint.sh | 1 + 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 173a88bc4c..4692037b44 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -448,6 +448,7 @@ services: TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} CSP_WHITELIST: ${CSP_WHITELIST:-} TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-} + INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-} # The postgres database. db: diff --git a/web/.env.example b/web/.env.example index 2decef02fa..e2117ddfd8 100644 --- a/web/.env.example +++ b/web/.env.example @@ -28,3 +28,6 @@ NEXT_PUBLIC_CSP_WHITELIST= # The maximum number of top-k value for RAG. NEXT_PUBLIC_TOP_K_MAX_VALUE=10 + +# The maximum number of tokens for segmentation +NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000 diff --git a/web/app/components/datasets/create/step-two/inputs.tsx b/web/app/components/datasets/create/step-two/inputs.tsx index 4231f6242d..acd4e3ae43 100644 --- a/web/app/components/datasets/create/step-two/inputs.tsx +++ b/web/app/components/datasets/create/step-two/inputs.tsx @@ -39,6 +39,8 @@ export const DelimiterInput: FC = (props) => } export const MaxLengthInput: FC = (props) => { + const maxValue = parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10) + const { t } = useTranslation() return {t('datasetCreation.stepTwo.maxLength')} @@ -46,8 +48,8 @@ export const MaxLengthInput: FC = (props) => { diff --git a/web/app/layout.tsx b/web/app/layout.tsx index b52c904561..da659e6467 100644 --- a/web/app/layout.tsx +++ b/web/app/layout.tsx @@ -45,6 +45,7 @@ const LocaleLayout = ({ data-public-site-about={process.env.NEXT_PUBLIC_SITE_ABOUT} data-public-text-generation-timeout-ms={process.env.NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS} data-public-top-k-max-value={process.env.NEXT_PUBLIC_TOP_K_MAX_VALUE} + data-public-indexing-max-segmentation-tokens-length={process.env.NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} > diff --git a/web/docker/entrypoint.sh b/web/docker/entrypoint.sh index 8c6de0eb79..bad95b6cbe 100755 --- a/web/docker/entrypoint.sh +++ b/web/docker/entrypoint.sh @@ -24,5 +24,6 @@ export NEXT_TELEMETRY_DISABLED=${NEXT_TELEMETRY_DISABLED} export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=${TEXT_GENERATION_TIMEOUT_MS} export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST} export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE} +export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} pm2 start ./pm2.json --no-daemon