mirror of https://github.com/langgenius/dify.git
Fix: rename module flags to avoid pyright constant redefinition errors (_langdetect_available, _persian_chars_re); update tests
This commit is contained in:
parent
3c9907ea81
commit
8dbae53c78
|
|
@ -44,19 +44,19 @@ _PERSIAN_HEURISTIC = re.compile(
|
|||
)
|
||||
|
||||
# Precompiled regex for Persian-specific characters (including Persian ye U+06CC)
|
||||
_PERSIAN_CHARS_RE = re.compile(r"[پچژگک\u06CC]")
|
||||
_persian_chars_re = re.compile(r"[پچژگک\u06CC]")
|
||||
|
||||
# Optional langdetect import — import once at module import time to avoid repeated lookups
|
||||
_LANGDETECT_AVAILABLE = False
|
||||
_langdetect_available = False
|
||||
try:
|
||||
from langdetect import DetectorFactory, detect # type: ignore
|
||||
|
||||
DetectorFactory.seed = 0
|
||||
_LANGDETECT_AVAILABLE = True
|
||||
_langdetect_available = True
|
||||
except Exception:
|
||||
detect = None
|
||||
DetectorFactory = None
|
||||
_LANGDETECT_AVAILABLE = False
|
||||
_langdetect_available = False
|
||||
|
||||
|
||||
def _contains_persian(text: str) -> bool:
|
||||
|
|
@ -68,7 +68,7 @@ def _contains_persian(text: str) -> bool:
|
|||
text = text or ""
|
||||
|
||||
# 1) Quick check: Persian-specific letters
|
||||
if _PERSIAN_CHARS_RE.search(text):
|
||||
if _persian_chars_re.search(text):
|
||||
return True
|
||||
|
||||
# 2) Heuristic check for common Persian words (fast, precompiled)
|
||||
|
|
@ -76,7 +76,7 @@ def _contains_persian(text: str) -> bool:
|
|||
return True
|
||||
|
||||
# 3) Fallback: language detection (more expensive) — only run if langdetect is available
|
||||
if _LANGDETECT_AVAILABLE and detect is not None:
|
||||
if _langdetect_available and detect is not None:
|
||||
try:
|
||||
return detect(text) == "fa"
|
||||
except Exception as exc:
|
||||
|
|
@ -86,22 +86,6 @@ def _contains_persian(text: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
# Precompiled regex for Persian-specific characters (including Persian ye U+06CC)
|
||||
_PERSIAN_CHARS_RE = re.compile(r"[پچژگک\u06CC]")
|
||||
|
||||
# Optional langdetect import — import once at module import time to avoid repeated lookups
|
||||
_LANGDETECT_AVAILABLE = False
|
||||
try:
|
||||
from langdetect import DetectorFactory, detect # type: ignore
|
||||
|
||||
DetectorFactory.seed = 0
|
||||
_LANGDETECT_AVAILABLE = True
|
||||
except Exception:
|
||||
detect = None
|
||||
DetectorFactory = None
|
||||
_LANGDETECT_AVAILABLE = False
|
||||
|
||||
|
||||
class WorkflowServiceInterface(Protocol):
|
||||
def get_draft_workflow(self, app_model: App, workflow_id: str | None = None) -> Workflow | None:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ def test_generate_conversation_name_persian(monkeypatch):
|
|||
|
||||
|
||||
def test_contains_persian_character_and_heuristics(monkeypatch):
|
||||
from core.llm_generator.llm_generator import _contains_persian, _PERSIAN_CHARS_RE, _PERSIAN_HEURISTIC
|
||||
from core.llm_generator.llm_generator import _contains_persian, _persian_chars_re, _PERSIAN_HEURISTIC
|
||||
|
||||
# By single Persian-specific character
|
||||
assert _contains_persian("این یک تست پ") is True
|
||||
|
|
@ -246,11 +246,11 @@ def test_contains_persian_langdetect_fallback(monkeypatch):
|
|||
import core.llm_generator.llm_generator as lg
|
||||
|
||||
# Simulate langdetect being available and detecting Persian
|
||||
monkeypatch.setattr(lg, "_LANGDETECT_AVAILABLE", True)
|
||||
monkeypatch.setattr(lg, "_langdetect_available", True)
|
||||
monkeypatch.setattr(lg, "detect", lambda text: "fa")
|
||||
|
||||
assert lg._contains_persian("short ambiguous text") is True
|
||||
|
||||
# Reset monkeypatch
|
||||
monkeypatch.setattr(lg, "_LANGDETECT_AVAILABLE", False)
|
||||
monkeypatch.setattr(lg, "_langdetect_available", False)
|
||||
monkeypatch.setattr(lg, "detect", None)
|
||||
|
|
|
|||
Loading…
Reference in New Issue