diff --git a/api/core/rag/datasource/keyword/mecab/mecab.py b/api/core/rag/datasource/keyword/mecab/mecab.py index 660b38650f..70841c8efc 100644 --- a/api/core/rag/datasource/keyword/mecab/mecab.py +++ b/api/core/rag/datasource/keyword/mecab/mecab.py @@ -61,7 +61,7 @@ class MeCab(BaseKeyword): self._keyword_handler.pos_weights = self._config.pos_weights self._keyword_handler.min_score = self._config.score_threshold except Exception as e: - logger.error(f"Failed to initialize MeCab handler: {str(e)}") + logger.exception("Failed to initialize MeCab handler") raise KeywordProcessorError(f"MeCab initialization failed: {str(e)}") def create(self, texts: list[Document], **kwargs) -> BaseKeyword: diff --git a/api/core/rag/datasource/keyword/mecab/mecab_keyword_table_handler.py b/api/core/rag/datasource/keyword/mecab/mecab_keyword_table_handler.py index f920c0dd31..7efb57ce16 100644 --- a/api/core/rag/datasource/keyword/mecab/mecab_keyword_table_handler.py +++ b/api/core/rag/datasource/keyword/mecab/mecab_keyword_table_handler.py @@ -1,5 +1,6 @@ from collections import defaultdict -from typing import Optional, Set +from operator import itemgetter +from typing import Optional import MeCab @@ -41,7 +42,7 @@ class MeCabKeywordTableHandler: } self.min_score = 0.3 - def extract_keywords(self, text: str, max_keywords_per_chunk: Optional[int] = 10) -> Set[str]: + def extract_keywords(self, text: str, max_keywords_per_chunk: Optional[int] = 10) -> set[str]: """Extract keywords from Japanese text using MeCab. Args: @@ -80,7 +81,7 @@ class MeCabKeywordTableHandler: node = node.next # Get top scoring terms - sorted_terms = sorted(term_scores.items(), key=lambda x: x[1], reverse=True) + sorted_terms = sorted(term_scores.items(), key=itemgetter(1), reverse=True) # Filter by minimum score and take top N keywords = {term for term, score in sorted_terms if score >= self.min_score} @@ -96,7 +97,7 @@ class MeCabKeywordTableHandler: except Exception as e: raise RuntimeError(f"Failed to extract keywords: {str(e)}") - def _expand_tokens_with_compounds(self, keywords: Set[str], text: str) -> Set[str]: + def _expand_tokens_with_compounds(self, keywords: set[str], text: str) -> set[str]: """Expand keywords with compound terms. This method looks for adjacent keywords in the original text to capture