diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py index ec2b7f2d44..cf4d3b7db2 100644 --- a/api/core/llm_generator/prompts.py +++ b/api/core/llm_generator/prompts.py @@ -11,6 +11,8 @@ Automatically identify the language of the user’s input (e.g. English, Chinese - The title must be natural, friendly, and in the same language as the input. - If the input is a direct question to the model, you may add an emoji at the end. +- Special Note for Persian (Farsi): If the input is Persian (Farsi), ALWAYS generate the title in Persian (Farsi). Use Persian characters (for example: پ، چ، ژ، گ، ک، ی) and ensure the "Language Type" field is "Persian" or "Farsi". Do NOT use Arabic or any other language or script when the input is Persian. + 3. Output Format Return **only** a valid JSON object with these exact keys and no additional text: { diff --git a/api/tests/unit_tests/core/test_llm_generator_persian.py b/api/tests/unit_tests/core/test_llm_generator_persian.py new file mode 100644 index 0000000000..3e62140871 --- /dev/null +++ b/api/tests/unit_tests/core/test_llm_generator_persian.py @@ -0,0 +1,65 @@ +import json +from unittest.mock import MagicMock, patch + +from core.llm_generator.llm_generator import LLMGenerator + + +class DummyMessage: + def __init__(self, content): + self.content = content + + +class DummyResponse: + def __init__(self, content): + self.message = DummyMessage(content) + + +def make_json_response(language, output): + return json.dumps({"Language Type": language, "Your Reasoning": "...", "Your Output": output}) + + +@patch("core.llm_generator.llm_generator.ModelManager.get_default_model_instance") +def test_generate_conversation_name_enforces_persian(mock_get_model): + # A Persian input containing Persian-specific character 'پ' + persian_query = "سلام، چطوری؟ پ" # contains 'پ' + + # First model response: misdetected as Arabic and returns Arabic title + first_resp = DummyResponse(make_json_response("Arabic", "مرحبا")) + # Second response (after retry): returns a Persian title with Persian-specific chars + second_resp = DummyResponse(make_json_response("Persian", "عنوان پِرس")) + + model_instance = MagicMock() + model_instance.invoke_llm.side_effect = [first_resp, second_resp] + + mock_get_model.return_value = model_instance + + name = LLMGenerator.generate_conversation_name("tenant1", persian_query) + + # The final name should come from the Persian response (contains Persian-specific char 'پ') + assert "پ" in name + # Ensure the model was invoked at least twice (retry occurred) + assert model_instance.invoke_llm.call_count >= 2 + + +@patch("core.llm_generator.llm_generator.ModelManager.get_default_model_instance") +def test_generate_conversation_name_translation_fallback(mock_get_model): + # Persian query + persian_query = "این یک تست است پ" + + # Model returns non-Persian outputs consistently + non_persian_resp = DummyResponse(make_json_response("Arabic", "مرحبا")) + + # Translate response (last call) returns Persian translation + translate_resp = DummyResponse("عنوان ترجمه شده پ") + + model_instance = MagicMock() + # First two calls return non-persian results; third call is translation + model_instance.invoke_llm.side_effect = [non_persian_resp, non_persian_resp, translate_resp] + + mock_get_model.return_value = model_instance + + name = LLMGenerator.generate_conversation_name("tenant1", persian_query) + + # Final name should contain Persian character 'پ' from translation fallback + assert "پ" in name + assert model_instance.invoke_llm.call_count >= 3 diff --git a/api/uv.lock b/api/uv.lock index b6a554ec4d..682f186a4a 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1372,6 +1372,7 @@ dependencies = [ { name = "jieba" }, { name = "json-repair" }, { name = "jsonschema" }, + { name = "langdetect" }, { name = "langfuse" }, { name = "langsmith" }, { name = "litellm" }, @@ -1568,6 +1569,7 @@ requires-dist = [ { name = "jieba", specifier = "==0.42.1" }, { name = "json-repair", specifier = ">=0.41.1" }, { name = "jsonschema", specifier = ">=4.25.1" }, + { name = "langdetect", specifier = "~=1.0.9" }, { name = "langfuse", specifier = "~=2.51.3" }, { name = "langsmith", specifier = "~=0.1.77" }, { name = "litellm", specifier = "==1.77.1" }, diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index f446e385b3..3a06fa16c0 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -176,6 +176,12 @@ services: THIRD_PARTY_SIGNATURE_VERIFICATION_ENABLED: true THIRD_PARTY_SIGNATURE_VERIFICATION_PUBLIC_KEYS: /app/keys/publickey.pem FORCE_VERIFYING_SIGNATURE: false + + HTTP_PROXY: ${HTTP_PROXY:-http://ssrf_proxy:3128} + HTTPS_PROXY: ${HTTPS_PROXY:-http://ssrf_proxy:3128} + PLUGIN_PYTHON_ENV_INIT_TIMEOUT: ${PLUGIN_PYTHON_ENV_INIT_TIMEOUT:-120} + extra_hosts: + - "host.docker.internal:host-gateway" ports: - "${EXPOSE_PLUGIN_DAEMON_PORT:-5002}:${PLUGIN_DAEMON_PORT:-5002}" - "${EXPOSE_PLUGIN_DEBUGGING_PORT:-5003}:${PLUGIN_DEBUGGING_PORT:-5003}"