diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py index af7995f3bd..1e44d89e2f 100644 --- a/api/core/llm_generator/prompts.py +++ b/api/core/llm_generator/prompts.py @@ -435,7 +435,19 @@ You should edit the prompt according to the IDEAL OUTPUT.""" INSTRUCTION_GENERATE_TEMPLATE_CODE = """Please fix the errors in the {{#error_message#}}.""" -DEFAULT_GENERATOR_SUMMARY_PROMPT = """ -You are a helpful assistant that summarizes long pieces of text into concise summaries. -Given the following text, generate a brief summary that captures the main points and key information. -The summary should be clear, concise, and written in complete sentences. """ +DEFAULT_GENERATOR_SUMMARY_PROMPT = ( + """Summarize the following content. Extract only the key information and main points. """ + """Remove redundant details. + +Requirements: +1. Write a concise summary in plain text +2. Use the same language as the input content +3. Focus on important facts, concepts, and details +4. If images are included, describe their key information +5. Do not use words like "好的", "ok", "I understand", "This text discusses", "The content mentions" +6. Write directly without extra words + +Output only the summary text. Start summarizing now: + +""" +) \ No newline at end of file diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 226efcc6d8..1b710783fe 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -965,6 +965,9 @@ class DatasetService: else: raise ValueError("Invalid index method") dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump() + # Update summary_index_setting if provided + if knowledge_configuration.summary_index_setting is not None: + dataset.summary_index_setting = knowledge_configuration.summary_index_setting session.add(dataset) else: if dataset.chunk_structure and dataset.chunk_structure != knowledge_configuration.chunk_structure: @@ -1070,6 +1073,9 @@ class DatasetService: if dataset.keyword_number != knowledge_configuration.keyword_number: dataset.keyword_number = knowledge_configuration.keyword_number dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump() + # Update summary_index_setting if provided + if knowledge_configuration.summary_index_setting is not None: + dataset.summary_index_setting = knowledge_configuration.summary_index_setting session.add(dataset) session.commit() if action: diff --git a/api/services/entities/knowledge_entities/rag_pipeline_entities.py b/api/services/entities/knowledge_entities/rag_pipeline_entities.py index cbb0efcc2a..defd3cdd71 100644 --- a/api/services/entities/knowledge_entities/rag_pipeline_entities.py +++ b/api/services/entities/knowledge_entities/rag_pipeline_entities.py @@ -116,6 +116,7 @@ class KnowledgeConfiguration(BaseModel): embedding_model: str = "" keyword_number: int | None = 10 retrieval_model: RetrievalSetting + summary_index_setting: dict | None = None @field_validator("embedding_model_provider", mode="before") @classmethod