Fixes #25619 The regex patterns for file-preview and image-preview contained an unescaped `?`, which caused incorrect matches such as `file-previe` or `image-previw`. This led to malformed URLs being incorrectly re-signed. Changes: - Escape `?` in both file-preview and image-preview regex patterns. - Ensure only valid URLs are re-signed. Added unit tests to cover: - Valid file-preview and image-preview URLs (correctly re-signed). - Misspelled file/image preview URLs (no longer incorrectly matched). Other: - Fix a deprecated function `datetime.utcnow()` Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Asuka Minato <i@asukaminato.eu.org> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>tags/1.9.0
| sign_url = sign_tool_file(tool_file_id=tool_file_id, extension=extension) | sign_url = sign_tool_file(tool_file_id=tool_file_id, extension=extension) | ||||
| elif "file-preview" in url: | elif "file-preview" in url: | ||||
| # get upload file id | # get upload file id | ||||
| upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview?\?timestamp=" | |||||
| upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview\?timestamp=" | |||||
| result = re.search(upload_file_id_pattern, url) | result = re.search(upload_file_id_pattern, url) | ||||
| if not result: | if not result: | ||||
| continue | continue | ||||
| sign_url = file_helpers.get_signed_file_url(upload_file_id) | sign_url = file_helpers.get_signed_file_url(upload_file_id) | ||||
| elif "image-preview" in url: | elif "image-preview" in url: | ||||
| # image-preview is deprecated, use file-preview instead | # image-preview is deprecated, use file-preview instead | ||||
| upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview?\?timestamp=" | |||||
| upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview\?timestamp=" | |||||
| result = re.search(upload_file_id_pattern, url) | result = re.search(upload_file_id_pattern, url) | ||||
| if not result: | if not result: | ||||
| continue | continue |
| from faker import Faker | from faker import Faker | ||||
| from extensions.ext_database import db | from extensions.ext_database import db | ||||
| from libs.datetime_utils import naive_utc_now | |||||
| from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole | from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole | ||||
| from models.dataset import Dataset, Document, DocumentSegment | from models.dataset import Dataset, Document, DocumentSegment | ||||
| from models.model import UploadFile | from models.model import UploadFile | ||||
| UploadFile: Created upload file instance | UploadFile: Created upload file instance | ||||
| """ | """ | ||||
| fake = Faker() | fake = Faker() | ||||
| from datetime import datetime | |||||
| from models.enums import CreatorUserRole | from models.enums import CreatorUserRole | ||||
| mime_type="text/plain", | mime_type="text/plain", | ||||
| created_by_role=CreatorUserRole.ACCOUNT, | created_by_role=CreatorUserRole.ACCOUNT, | ||||
| created_by=account.id, | created_by=account.id, | ||||
| created_at=datetime.utcnow(), | |||||
| created_at=naive_utc_now(), | |||||
| used=False, | used=False, | ||||
| ) | ) | ||||
| """Unit tests for workflow node execution conflict handling.""" | """Unit tests for workflow node execution conflict handling.""" | ||||
| from datetime import datetime | |||||
| from unittest.mock import MagicMock, Mock | from unittest.mock import MagicMock, Mock | ||||
| import psycopg2.errors | import psycopg2.errors | ||||
| WorkflowNodeExecutionStatus, | WorkflowNodeExecutionStatus, | ||||
| ) | ) | ||||
| from core.workflow.enums import NodeType | from core.workflow.enums import NodeType | ||||
| from libs.datetime_utils import naive_utc_now | |||||
| from models import Account, WorkflowNodeExecutionTriggeredFrom | from models import Account, WorkflowNodeExecutionTriggeredFrom | ||||
| title="Test Node", | title="Test Node", | ||||
| index=1, | index=1, | ||||
| status=WorkflowNodeExecutionStatus.RUNNING, | status=WorkflowNodeExecutionStatus.RUNNING, | ||||
| created_at=datetime.utcnow(), | |||||
| created_at=naive_utc_now(), | |||||
| ) | ) | ||||
| original_id = execution.id | original_id = execution.id | ||||
| title="Test Node", | title="Test Node", | ||||
| index=1, | index=1, | ||||
| status=WorkflowNodeExecutionStatus.SUCCEEDED, | status=WorkflowNodeExecutionStatus.SUCCEEDED, | ||||
| created_at=datetime.utcnow(), | |||||
| created_at=naive_utc_now(), | |||||
| ) | ) | ||||
| # Save should update existing record | # Save should update existing record | ||||
| title="Test Node", | title="Test Node", | ||||
| index=1, | index=1, | ||||
| status=WorkflowNodeExecutionStatus.RUNNING, | status=WorkflowNodeExecutionStatus.RUNNING, | ||||
| created_at=datetime.utcnow(), | |||||
| created_at=naive_utc_now(), | |||||
| ) | ) | ||||
| # Save should raise IntegrityError after max retries | # Save should raise IntegrityError after max retries | ||||
| title="Test Node", | title="Test Node", | ||||
| index=1, | index=1, | ||||
| status=WorkflowNodeExecutionStatus.RUNNING, | status=WorkflowNodeExecutionStatus.RUNNING, | ||||
| created_at=datetime.utcnow(), | |||||
| created_at=naive_utc_now(), | |||||
| ) | ) | ||||
| # Save should raise error immediately | # Save should raise error immediately |
| import importlib | |||||
| import types | |||||
| import pytest | |||||
| from models.model import Message | |||||
| @pytest.fixture(autouse=True) | |||||
| def patch_file_helpers(monkeypatch: pytest.MonkeyPatch): | |||||
| """ | |||||
| Patch file_helpers.get_signed_file_url to a deterministic stub. | |||||
| """ | |||||
| model_module = importlib.import_module("models.model") | |||||
| dummy = types.SimpleNamespace(get_signed_file_url=lambda fid: f"https://signed.example/{fid}") | |||||
| # Inject/override file_helpers on models.model | |||||
| monkeypatch.setattr(model_module, "file_helpers", dummy, raising=False) | |||||
| def _wrap_md(url: str) -> str: | |||||
| """ | |||||
| Wrap a raw URL into the markdown that re_sign_file_url_answer expects: | |||||
| [link](<url>) | |||||
| """ | |||||
| return f"please click [file]({url}) to download." | |||||
| def test_file_preview_valid_replaced(): | |||||
| """ | |||||
| Valid file-preview URL must be re-signed: | |||||
| - Extract upload_file_id correctly | |||||
| - Replace the original URL with the signed URL | |||||
| """ | |||||
| upload_id = "abc-123" | |||||
| url = f"/files/{upload_id}/file-preview?timestamp=111&nonce=222&sign=333" | |||||
| msg = Message(answer=_wrap_md(url)) | |||||
| out = msg.re_sign_file_url_answer | |||||
| assert f"https://signed.example/{upload_id}" in out | |||||
| assert url not in out | |||||
| def test_file_preview_misspelled_not_replaced(): | |||||
| """ | |||||
| Misspelled endpoint 'file-previe?timestamp=' should NOT be rewritten. | |||||
| """ | |||||
| upload_id = "zzz-001" | |||||
| # path deliberately misspelled: file-previe? (missing 'w') | |||||
| # and we append ¬e=file-preview to trick the old `"file-preview" in url` check. | |||||
| url = f"/files/{upload_id}/file-previe?timestamp=111&nonce=222&sign=333¬e=file-preview" | |||||
| original = _wrap_md(url) | |||||
| msg = Message(answer=original) | |||||
| out = msg.re_sign_file_url_answer | |||||
| # Expect NO replacement, should not rewrite misspelled file-previe URL | |||||
| assert out == original | |||||
| def test_image_preview_valid_replaced(): | |||||
| """ | |||||
| Valid image-preview URL must be re-signed. | |||||
| """ | |||||
| upload_id = "img-789" | |||||
| url = f"/files/{upload_id}/image-preview?timestamp=123&nonce=456&sign=789" | |||||
| msg = Message(answer=_wrap_md(url)) | |||||
| out = msg.re_sign_file_url_answer | |||||
| assert f"https://signed.example/{upload_id}" in out | |||||
| assert url not in out | |||||
| def test_image_preview_misspelled_not_replaced(): | |||||
| """ | |||||
| Misspelled endpoint 'image-previe?timestamp=' should NOT be rewritten. | |||||
| """ | |||||
| upload_id = "img-err-42" | |||||
| url = f"/files/{upload_id}/image-previe?timestamp=1&nonce=2&sign=3¬e=image-preview" | |||||
| original = _wrap_md(url) | |||||
| msg = Message(answer=original) | |||||
| out = msg.re_sign_file_url_answer | |||||
| # Expect NO replacement, should not rewrite misspelled image-previe URL | |||||
| assert out == original |