Fix: rename module flags to avoid pyright constant redefinition errors (_langdetect_available, _persian_chars_re); update tests

2026-03-17 23:51:24 +08:00 · 2025-12-22 15:12:52 +01:00 · 2025-12-22 15:12:52 +01:00 · 8dbae53c78
commit 8dbae53c78
parent 3c9907ea81
2 changed files with 9 additions and 25 deletions
--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@ -44,19 +44,19 @@ _PERSIAN_HEURISTIC = re.compile(
 )

 # Precompiled regex for Persian-specific characters (including Persian ye U+06CC)
-_PERSIAN_CHARS_RE = re.compile(r"[پچژگک\u06CC]")
+_persian_chars_re = re.compile(r"[پچژگک\u06CC]")

 # Optional langdetect import — import once at module import time to avoid repeated lookups
-_LANGDETECT_AVAILABLE = False
+_langdetect_available = False
 try:
    from langdetect import DetectorFactory, detect  # type: ignore

    DetectorFactory.seed = 0
-    _LANGDETECT_AVAILABLE = True
+    _langdetect_available = True
 except Exception:
    detect = None
    DetectorFactory = None
-    _LANGDETECT_AVAILABLE = False
+    _langdetect_available = False


 def _contains_persian(text: str) -> bool:
@ -68,7 +68,7 @@ def _contains_persian(text: str) -> bool:
    text = text or ""

    # 1) Quick check: Persian-specific letters
-    if _PERSIAN_CHARS_RE.search(text):
+    if _persian_chars_re.search(text):
        return True

    # 2) Heuristic check for common Persian words (fast, precompiled)
@ -76,7 +76,7 @@ def _contains_persian(text: str) -> bool:
        return True

    # 3) Fallback: language detection (more expensive) — only run if langdetect is available
-    if _LANGDETECT_AVAILABLE and detect is not None:
+    if _langdetect_available and detect is not None:
        try:
            return detect(text) == "fa"
        except Exception as exc:
@ -86,22 +86,6 @@ def _contains_persian(text: str) -> bool:
    return False


-# Precompiled regex for Persian-specific characters (including Persian ye U+06CC)
-_PERSIAN_CHARS_RE = re.compile(r"[پچژگک\u06CC]")
-
-# Optional langdetect import — import once at module import time to avoid repeated lookups
-_LANGDETECT_AVAILABLE = False
-try:
-    from langdetect import DetectorFactory, detect  # type: ignore
-
-    DetectorFactory.seed = 0
-    _LANGDETECT_AVAILABLE = True
-except Exception:
-    detect = None
-    DetectorFactory = None
-    _LANGDETECT_AVAILABLE = False
-
-
 class WorkflowServiceInterface(Protocol):
    def get_draft_workflow(self, app_model: App, workflow_id: str | None = None) -> Workflow | None:
        pass
--- a/tests/unit_tests/core/llm_generator/test_llm_generator_persian.py
+++ b/tests/unit_tests/core/llm_generator/test_llm_generator_persian.py
@ -233,7 +233,7 @@ def test_generate_conversation_name_persian(monkeypatch):


 def test_contains_persian_character_and_heuristics(monkeypatch):
-    from core.llm_generator.llm_generator import _contains_persian, _PERSIAN_CHARS_RE, _PERSIAN_HEURISTIC
+    from core.llm_generator.llm_generator import _contains_persian, _persian_chars_re, _PERSIAN_HEURISTIC

    # By single Persian-specific character
    assert _contains_persian("این یک تست پ") is True
@ -246,11 +246,11 @@ def test_contains_persian_langdetect_fallback(monkeypatch):
    import core.llm_generator.llm_generator as lg

    # Simulate langdetect being available and detecting Persian
-    monkeypatch.setattr(lg, "_LANGDETECT_AVAILABLE", True)
+    monkeypatch.setattr(lg, "_langdetect_available", True)
    monkeypatch.setattr(lg, "detect", lambda text: "fa")

    assert lg._contains_persian("short ambiguous text") is True

    # Reset monkeypatch
-    monkeypatch.setattr(lg, "_LANGDETECT_AVAILABLE", False)
+    monkeypatch.setattr(lg, "_langdetect_available", False)
    monkeypatch.setattr(lg, "detect", None)