瀏覽代碼

fix(document_extractor): xlsx file column int type error (#21408)

tags/1.5.0
quicksand 4 月之前
父節點
當前提交
45146edb31
沒有連結到貢獻者的電子郵件帳戶。

+ 1
- 1
api/core/workflow/nodes/document_extractor/node.py 查看文件

df = df.applymap(lambda x: " ".join(str(x).splitlines()) if isinstance(x, str) else x) # type: ignore df = df.applymap(lambda x: " ".join(str(x).splitlines()) if isinstance(x, str) else x) # type: ignore


# Combine multi-line text in column names into a single line # Combine multi-line text in column names into a single line
df.columns = pd.Index([" ".join(col.splitlines()) for col in df.columns])
df.columns = pd.Index([" ".join(str(col).splitlines()) for col in df.columns])


# Manually construct the Markdown table # Manually construct the Markdown table
markdown_table += _construct_markdown_table(df) + "\n\n" markdown_table += _construct_markdown_table(df) + "\n\n"

+ 23
- 0
api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py 查看文件

assert result == "" assert result == ""


assert mock_excel_instance.parse.call_count == 2 assert mock_excel_instance.parse.call_count == 2


@patch("pandas.ExcelFile")
def test_extract_text_from_excel_numeric_type_column(mock_excel_file):
"""Test extracting text from Excel file with numeric column names."""

# Test numeric type column
data = {1: ["Test"], 1.1: ["Test"]}

df = pd.DataFrame(data)

# Mock ExcelFile
mock_excel_instance = Mock()
mock_excel_instance.sheet_names = ["Sheet1"]
mock_excel_instance.parse.return_value = df
mock_excel_file.return_value = mock_excel_instance

file_content = b"fake_excel_content"
result = _extract_text_from_excel(file_content)

expected_manual = "| 1.0 | 1.1 |\n| --- | --- |\n| Test | Test |\n\n"

assert expected_manual == result

Loading…
取消
儲存