mirror of https://github.com/langgenius/dify.git
move stopword data package from code to Dockerfile
This commit is contained in:
parent
4e201ef059
commit
e6f1500cfe
|
|
@ -73,7 +73,7 @@ COPY --from=packages ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||||
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
||||||
|
|
||||||
# Download nltk data
|
# Download nltk data
|
||||||
RUN mkdir -p /usr/local/share/nltk_data && NLTK_DATA=/usr/local/share/nltk_data python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger')" \
|
RUN mkdir -p /usr/local/share/nltk_data && NLTK_DATA=/usr/local/share/nltk_data python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger'); nltk.download('stopwords')" \
|
||||||
&& chmod -R 755 /usr/local/share/nltk_data
|
&& chmod -R 755 /usr/local/share/nltk_data
|
||||||
|
|
||||||
ENV TIKTOKEN_CACHE_DIR=/app/api/.tiktoken_cache
|
ENV TIKTOKEN_CACHE_DIR=/app/api/.tiktoken_cache
|
||||||
|
|
@ -95,7 +95,7 @@ RUN groupadd -r -g 1001 dify && \
|
||||||
|
|
||||||
ARG COMMIT_SHA
|
ARG COMMIT_SHA
|
||||||
ENV COMMIT_SHA=${COMMIT_SHA}
|
ENV COMMIT_SHA=${COMMIT_SHA}
|
||||||
|
ENV NLTK_DATA=/usr/local/share/nltk_data
|
||||||
USER 1001
|
USER 1001
|
||||||
|
|
||||||
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
|
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
|
||||||
|
|
|
||||||
|
|
@ -302,8 +302,7 @@ class OracleVector(BaseVector):
|
||||||
nltk.data.find("tokenizers/punkt")
|
nltk.data.find("tokenizers/punkt")
|
||||||
nltk.data.find("corpora/stopwords")
|
nltk.data.find("corpora/stopwords")
|
||||||
except LookupError:
|
except LookupError:
|
||||||
nltk.download("punkt")
|
raise LookupError("Unable to find the required NLTK data package: punkt and stopwords")
|
||||||
nltk.download("stopwords")
|
|
||||||
e_str = re.sub(r"[^\w ]", "", query)
|
e_str = re.sub(r"[^\w ]", "", query)
|
||||||
all_tokens = nltk.word_tokenize(e_str)
|
all_tokens = nltk.word_tokenize(e_str)
|
||||||
stop_words = stopwords.words("english")
|
stop_words = stopwords.words("english")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue