Merge remote-tracking branch 'origin/main' into feat/trigger-saas

2025-11-13 15:58:43 +08:00 · 2025-11-13 15:58:43 +08:00 · 86bc2924f3
parent 318b9d707b a39b50adbb
commit 86bc2924f3
4 changed files with 104 additions and 3 deletions
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@ -152,13 +152,15 @@ class WordExtractor(BaseExtractor):
        # Initialize a row, all of which are empty by default
        row_cells = [""] * total_cols
        col_index = 0
-        for cell in row.cells:
+        while col_index < len(row.cells):
            # make sure the col_index is not out of range
-            while col_index < total_cols and row_cells[col_index] != "":
+            while col_index < len(row.cells) and row_cells[col_index] != "":
                col_index += 1
            # if col_index is out of range the loop is jumped
-            if col_index >= total_cols:
+            if col_index >= len(row.cells):
                break
+            # get the correct cell
+            cell = row.cells[col_index]
            cell_content = self._parse_cell(cell, image_map).strip()
            cell_colspan = cell.grid_span or 1
            for i in range(cell_colspan):
--- a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py
+++ b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py
@ -0,0 +1,49 @@
+"""Primarily used for testing merged cell scenarios"""
+
+from docx import Document
+
+from core.rag.extractor.word_extractor import WordExtractor
+
+
+def _generate_table_with_merged_cells():
+    doc = Document()
+
+    """
+    The table looks like this:
+    +-----+-----+-----+
+    | 1-1 & 1-2 | 1-3 |
+    +-----+-----+-----+
+    | 2-1 | 2-2 | 2-3 |
+    |  &  |-----+-----+
+    | 3-1 | 3-2 | 3-3 |
+    +-----+-----+-----+
+    """
+    table = doc.add_table(rows=3, cols=3)
+    table.style = "Table Grid"
+
+    for i in range(3):
+        for j in range(3):
+            cell = table.cell(i, j)
+            cell.text = f"{i + 1}-{j + 1}"
+
+    # Merge cells
+    cell_0_0 = table.cell(0, 0)
+    cell_0_1 = table.cell(0, 1)
+    merged_cell_1 = cell_0_0.merge(cell_0_1)
+    merged_cell_1.text = "1-1 & 1-2"
+
+    cell_1_0 = table.cell(1, 0)
+    cell_2_0 = table.cell(2, 0)
+    merged_cell_2 = cell_1_0.merge(cell_2_0)
+    merged_cell_2.text = "2-1 & 3-1"
+
+    ground_truth = [["1-1 & 1-2", "", "1-3"], ["2-1 & 3-1", "2-2", "2-3"], ["2-1 & 3-1", "3-2", "3-3"]]
+
+    return doc.tables[0], ground_truth
+
+
+def test_parse_row():
+    table, gt = _generate_table_with_merged_cells()
+    extractor = object.__new__(WordExtractor)
+    for idx, row in enumerate(table.rows):
+        assert extractor._parse_row(row, {}, 3) == gt[idx]
--- a/api/tests/unit_tests/core/workflow/graph_engine/test_database_utils.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/test_database_utils.py
@ -0,0 +1,46 @@
+"""
+Utilities for detecting if database service is available for workflow tests.
+"""
+
+import psycopg2
+import pytest
+
+from configs import dify_config
+
+
+def is_database_available() -> bool:
+    """
+    Check if the database service is available by attempting to connect to it.
+
+    Returns:
+        True if database is available, False otherwise.
+    """
+    try:
+        # Try to establish a database connection using a context manager
+        with psycopg2.connect(
+            host=dify_config.DB_HOST,
+            port=dify_config.DB_PORT,
+            database=dify_config.DB_DATABASE,
+            user=dify_config.DB_USERNAME,
+            password=dify_config.DB_PASSWORD,
+            connect_timeout=2,  # 2 second timeout
+        ) as conn:
+            pass  # Connection established and will be closed automatically
+        return True
+    except (psycopg2.OperationalError, psycopg2.Error):
+        return False
+
+
+def skip_if_database_unavailable():
+    """
+    Pytest skip decorator that skips tests when database service is unavailable.
+
+    Usage:
+        @skip_if_database_unavailable()
+        def test_my_workflow():
+            ...
+    """
+    return pytest.mark.skipif(
+        not is_database_available(),
+        reason="Database service is not available (connection refused or authentication failed)",
+    )
--- a/api/tests/unit_tests/core/workflow/graph_engine/test_iteration_flatten_output.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/test_iteration_flatten_output.py
@ -6,9 +6,11 @@ This module tests the iteration node's ability to:
 2. Preserve nested array structure when flatten_output=False
 """

+from .test_database_utils import skip_if_database_unavailable
 from .test_table_runner import TableTestRunner, WorkflowTestCase


+@skip_if_database_unavailable()
 def test_iteration_with_flatten_output_enabled():
    """
    Test iteration node with flatten_output=True (default behavior).
@ -37,6 +39,7 @@ def test_iteration_with_flatten_output_enabled():
    )


+@skip_if_database_unavailable()
 def test_iteration_with_flatten_output_disabled():
    """
    Test iteration node with flatten_output=False.
@ -65,6 +68,7 @@ def test_iteration_with_flatten_output_disabled():
    )


+@skip_if_database_unavailable()
 def test_iteration_flatten_output_comparison():
    """
    Run both flatten_output configurations in parallel to verify the difference.