Преглед на файлове

Fix: correct regex for file-preview URL re-signing (#25620)

Fixes #25619

The regex patterns for file-preview and image-preview contained an unescaped `?`, 
which caused incorrect matches such as `file-previe` or `image-previw`. 
This led to malformed URLs being incorrectly re-signed.

Changes:
- Escape `?` in both file-preview and image-preview regex patterns.
- Ensure only valid URLs are re-signed.

Added unit tests to cover:
- Valid file-preview and image-preview URLs (correctly re-signed).
- Misspelled file/image preview URLs (no longer incorrectly matched).

Other:
- Fix a deprecated function `datetime.utcnow()`

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Asuka Minato <i@asukaminato.eu.org>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
tags/1.9.0
Yongtao Huang преди 1 месец
родител
ревизия
5bc6e8a433
No account linked to committer's email address

+ 2
- 2
api/models/model.py Целия файл

sign_url = sign_tool_file(tool_file_id=tool_file_id, extension=extension) sign_url = sign_tool_file(tool_file_id=tool_file_id, extension=extension)
elif "file-preview" in url: elif "file-preview" in url:
# get upload file id # get upload file id
upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview?\?timestamp="
upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview\?timestamp="
result = re.search(upload_file_id_pattern, url) result = re.search(upload_file_id_pattern, url)
if not result: if not result:
continue continue
sign_url = file_helpers.get_signed_file_url(upload_file_id) sign_url = file_helpers.get_signed_file_url(upload_file_id)
elif "image-preview" in url: elif "image-preview" in url:
# image-preview is deprecated, use file-preview instead # image-preview is deprecated, use file-preview instead
upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview?\?timestamp="
upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview\?timestamp="
result = re.search(upload_file_id_pattern, url) result = re.search(upload_file_id_pattern, url)
if not result: if not result:
continue continue

+ 2
- 2
api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py Целия файл

from faker import Faker from faker import Faker


from extensions.ext_database import db from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from models.model import UploadFile from models.model import UploadFile
UploadFile: Created upload file instance UploadFile: Created upload file instance
""" """
fake = Faker() fake = Faker()
from datetime import datetime


from models.enums import CreatorUserRole from models.enums import CreatorUserRole


mime_type="text/plain", mime_type="text/plain",
created_by_role=CreatorUserRole.ACCOUNT, created_by_role=CreatorUserRole.ACCOUNT,
created_by=account.id, created_by=account.id,
created_at=datetime.utcnow(),
created_at=naive_utc_now(),
used=False, used=False,
) )



+ 5
- 5
api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py Целия файл

"""Unit tests for workflow node execution conflict handling.""" """Unit tests for workflow node execution conflict handling."""


from datetime import datetime
from unittest.mock import MagicMock, Mock from unittest.mock import MagicMock, Mock


import psycopg2.errors import psycopg2.errors
WorkflowNodeExecutionStatus, WorkflowNodeExecutionStatus,
) )
from core.workflow.enums import NodeType from core.workflow.enums import NodeType
from libs.datetime_utils import naive_utc_now
from models import Account, WorkflowNodeExecutionTriggeredFrom from models import Account, WorkflowNodeExecutionTriggeredFrom




title="Test Node", title="Test Node",
index=1, index=1,
status=WorkflowNodeExecutionStatus.RUNNING, status=WorkflowNodeExecutionStatus.RUNNING,
created_at=datetime.utcnow(),
created_at=naive_utc_now(),
) )


original_id = execution.id original_id = execution.id
title="Test Node", title="Test Node",
index=1, index=1,
status=WorkflowNodeExecutionStatus.SUCCEEDED, status=WorkflowNodeExecutionStatus.SUCCEEDED,
created_at=datetime.utcnow(),
created_at=naive_utc_now(),
) )


# Save should update existing record # Save should update existing record
title="Test Node", title="Test Node",
index=1, index=1,
status=WorkflowNodeExecutionStatus.RUNNING, status=WorkflowNodeExecutionStatus.RUNNING,
created_at=datetime.utcnow(),
created_at=naive_utc_now(),
) )


# Save should raise IntegrityError after max retries # Save should raise IntegrityError after max retries
title="Test Node", title="Test Node",
index=1, index=1,
status=WorkflowNodeExecutionStatus.RUNNING, status=WorkflowNodeExecutionStatus.RUNNING,
created_at=datetime.utcnow(),
created_at=naive_utc_now(),
) )


# Save should raise error immediately # Save should raise error immediately

+ 83
- 0
api/tests/unit_tests/models/test_model.py Целия файл

import importlib
import types

import pytest

from models.model import Message


@pytest.fixture(autouse=True)
def patch_file_helpers(monkeypatch: pytest.MonkeyPatch):
"""
Patch file_helpers.get_signed_file_url to a deterministic stub.
"""
model_module = importlib.import_module("models.model")
dummy = types.SimpleNamespace(get_signed_file_url=lambda fid: f"https://signed.example/{fid}")
# Inject/override file_helpers on models.model
monkeypatch.setattr(model_module, "file_helpers", dummy, raising=False)


def _wrap_md(url: str) -> str:
"""
Wrap a raw URL into the markdown that re_sign_file_url_answer expects:
[link](<url>)
"""
return f"please click [file]({url}) to download."


def test_file_preview_valid_replaced():
"""
Valid file-preview URL must be re-signed:
- Extract upload_file_id correctly
- Replace the original URL with the signed URL
"""
upload_id = "abc-123"
url = f"/files/{upload_id}/file-preview?timestamp=111&nonce=222&sign=333"
msg = Message(answer=_wrap_md(url))

out = msg.re_sign_file_url_answer
assert f"https://signed.example/{upload_id}" in out
assert url not in out


def test_file_preview_misspelled_not_replaced():
"""
Misspelled endpoint 'file-previe?timestamp=' should NOT be rewritten.
"""
upload_id = "zzz-001"
# path deliberately misspelled: file-previe? (missing 'w')
# and we append &note=file-preview to trick the old `"file-preview" in url` check.
url = f"/files/{upload_id}/file-previe?timestamp=111&nonce=222&sign=333&note=file-preview"
original = _wrap_md(url)
msg = Message(answer=original)

out = msg.re_sign_file_url_answer
# Expect NO replacement, should not rewrite misspelled file-previe URL
assert out == original


def test_image_preview_valid_replaced():
"""
Valid image-preview URL must be re-signed.
"""
upload_id = "img-789"
url = f"/files/{upload_id}/image-preview?timestamp=123&nonce=456&sign=789"
msg = Message(answer=_wrap_md(url))

out = msg.re_sign_file_url_answer
assert f"https://signed.example/{upload_id}" in out
assert url not in out


def test_image_preview_misspelled_not_replaced():
"""
Misspelled endpoint 'image-previe?timestamp=' should NOT be rewritten.
"""
upload_id = "img-err-42"
url = f"/files/{upload_id}/image-previe?timestamp=1&nonce=2&sign=3&note=image-preview"
original = _wrap_md(url)
msg = Message(answer=original)

out = msg.re_sign_file_url_answer
# Expect NO replacement, should not rewrite misspelled image-previe URL
assert out == original

Loading…
Отказ
Запис