Merge remote-tracking branch 'origin/main' into feat/trigger-saas

This commit is contained in:
lyzno1 2025-11-13 15:58:43 +08:00
commit 86bc2924f3
No known key found for this signature in database
4 changed files with 104 additions and 3 deletions

View File

@ -152,13 +152,15 @@ class WordExtractor(BaseExtractor):
# Initialize a row, all of which are empty by default
row_cells = [""] * total_cols
col_index = 0
for cell in row.cells:
while col_index < len(row.cells):
# make sure the col_index is not out of range
while col_index < total_cols and row_cells[col_index] != "":
while col_index < len(row.cells) and row_cells[col_index] != "":
col_index += 1
# if col_index is out of range the loop is jumped
if col_index >= total_cols:
if col_index >= len(row.cells):
break
# get the correct cell
cell = row.cells[col_index]
cell_content = self._parse_cell(cell, image_map).strip()
cell_colspan = cell.grid_span or 1
for i in range(cell_colspan):

View File

@ -0,0 +1,49 @@
"""Primarily used for testing merged cell scenarios"""
from docx import Document
from core.rag.extractor.word_extractor import WordExtractor
def _generate_table_with_merged_cells():
doc = Document()
"""
The table looks like this:
+-----+-----+-----+
| 1-1 & 1-2 | 1-3 |
+-----+-----+-----+
| 2-1 | 2-2 | 2-3 |
| & |-----+-----+
| 3-1 | 3-2 | 3-3 |
+-----+-----+-----+
"""
table = doc.add_table(rows=3, cols=3)
table.style = "Table Grid"
for i in range(3):
for j in range(3):
cell = table.cell(i, j)
cell.text = f"{i + 1}-{j + 1}"
# Merge cells
cell_0_0 = table.cell(0, 0)
cell_0_1 = table.cell(0, 1)
merged_cell_1 = cell_0_0.merge(cell_0_1)
merged_cell_1.text = "1-1 & 1-2"
cell_1_0 = table.cell(1, 0)
cell_2_0 = table.cell(2, 0)
merged_cell_2 = cell_1_0.merge(cell_2_0)
merged_cell_2.text = "2-1 & 3-1"
ground_truth = [["1-1 & 1-2", "", "1-3"], ["2-1 & 3-1", "2-2", "2-3"], ["2-1 & 3-1", "3-2", "3-3"]]
return doc.tables[0], ground_truth
def test_parse_row():
table, gt = _generate_table_with_merged_cells()
extractor = object.__new__(WordExtractor)
for idx, row in enumerate(table.rows):
assert extractor._parse_row(row, {}, 3) == gt[idx]

View File

@ -0,0 +1,46 @@
"""
Utilities for detecting if database service is available for workflow tests.
"""
import psycopg2
import pytest
from configs import dify_config
def is_database_available() -> bool:
"""
Check if the database service is available by attempting to connect to it.
Returns:
True if database is available, False otherwise.
"""
try:
# Try to establish a database connection using a context manager
with psycopg2.connect(
host=dify_config.DB_HOST,
port=dify_config.DB_PORT,
database=dify_config.DB_DATABASE,
user=dify_config.DB_USERNAME,
password=dify_config.DB_PASSWORD,
connect_timeout=2, # 2 second timeout
) as conn:
pass # Connection established and will be closed automatically
return True
except (psycopg2.OperationalError, psycopg2.Error):
return False
def skip_if_database_unavailable():
"""
Pytest skip decorator that skips tests when database service is unavailable.
Usage:
@skip_if_database_unavailable()
def test_my_workflow():
...
"""
return pytest.mark.skipif(
not is_database_available(),
reason="Database service is not available (connection refused or authentication failed)",
)

View File

@ -6,9 +6,11 @@ This module tests the iteration node's ability to:
2. Preserve nested array structure when flatten_output=False
"""
from .test_database_utils import skip_if_database_unavailable
from .test_table_runner import TableTestRunner, WorkflowTestCase
@skip_if_database_unavailable()
def test_iteration_with_flatten_output_enabled():
"""
Test iteration node with flatten_output=True (default behavior).
@ -37,6 +39,7 @@ def test_iteration_with_flatten_output_enabled():
)
@skip_if_database_unavailable()
def test_iteration_with_flatten_output_disabled():
"""
Test iteration node with flatten_output=False.
@ -65,6 +68,7 @@ def test_iteration_with_flatten_output_disabled():
)
@skip_if_database_unavailable()
def test_iteration_flatten_output_comparison():
"""
Run both flatten_output configurations in parallel to verify the difference.