From 34f3b288a720687020b3d1da59f7897a936fb407 Mon Sep 17 00:00:00 2001 From: Lework Date: Mon, 5 Jan 2026 15:50:33 +0800 Subject: [PATCH] chore(docker): update nltk data download process to include unstructured download_nltk_packages (#28876) --- api/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/Dockerfile b/api/Dockerfile index 02df91bfc1..e800e60322 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -79,7 +79,8 @@ COPY --from=packages --chown=dify:dify ${VIRTUAL_ENV} ${VIRTUAL_ENV} ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Download nltk data -RUN mkdir -p /usr/local/share/nltk_data && NLTK_DATA=/usr/local/share/nltk_data python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger'); nltk.download('stopwords')" \ +RUN mkdir -p /usr/local/share/nltk_data \ + && NLTK_DATA=/usr/local/share/nltk_data python -c "import nltk; from unstructured.nlp.tokenize import download_nltk_packages; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger'); nltk.download('stopwords'); download_nltk_packages()" \ && chmod -R 755 /usr/local/share/nltk_data ENV TIKTOKEN_CACHE_DIR=/app/api/.tiktoken_cache