| result = conn.execute(sa.text(variables_query)) | result = conn.execute(sa.text(variables_query)) | ||||
| orphaned_by_app = {} | orphaned_by_app = {} | ||||
| total_files = 0 | total_files = 0 | ||||
| for row in result: | for row in result: | ||||
| app_id, variable_count, file_count = row | app_id, variable_count, file_count = row | ||||
| orphaned_by_app[app_id] = { | |||||
| "variables": variable_count, | |||||
| "files": file_count | |||||
| } | |||||
| orphaned_by_app[app_id] = {"variables": variable_count, "files": file_count} | |||||
| total_files += file_count | total_files += file_count | ||||
| total_orphaned = sum(app_data["variables"] for app_data in orphaned_by_app.values()) | total_orphaned = sum(app_data["variables"] for app_data in orphaned_by_app.values()) |
| from core.tools.tool_manager import ToolManager | from core.tools.tool_manager import ToolManager | ||||
| from core.variables.segments import ArrayFileSegment, FileSegment, Segment | from core.variables.segments import ArrayFileSegment, FileSegment, Segment | ||||
| from core.workflow.entities import WorkflowExecution, WorkflowNodeExecution | from core.workflow.entities import WorkflowExecution, WorkflowNodeExecution | ||||
| from core.workflow.enums import WorkflowNodeExecutionStatus | |||||
| from core.workflow.nodes import NodeType | |||||
| from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus | |||||
| from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter | from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter | ||||
| from libs.datetime_utils import naive_utc_now | from libs.datetime_utils import naive_utc_now | ||||
| from models import ( | from models import ( |
| from collections.abc import Mapping, Sequence | from collections.abc import Mapping, Sequence | ||||
| from enum import StrEnum | from enum import StrEnum | ||||
| from typing import Any, Optional | |||||
| from typing import TYPE_CHECKING, Any, Optional | |||||
| from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator | from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator | ||||
| if TYPE_CHECKING: | |||||
| from core.ops.ops_trace_manager import TraceQueueManager | |||||
| from constants import UUID_NIL | from constants import UUID_NIL | ||||
| from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAppConfig | from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAppConfig | ||||
| from core.entities.provider_configuration import ProviderModelBundle | from core.entities.provider_configuration import ProviderModelBundle | ||||
| extras: dict[str, Any] = Field(default_factory=dict) | extras: dict[str, Any] = Field(default_factory=dict) | ||||
| # tracing instance | # tracing instance | ||||
| # Using Any to avoid circular import with TraceQueueManager | |||||
| trace_manager: Optional[Any] = None | |||||
| trace_manager: Optional["TraceQueueManager"] = None | |||||
| class EasyUIBasedAppGenerateEntity(AppGenerateEntity): | class EasyUIBasedAppGenerateEntity(AppGenerateEntity): | ||||
| inputs: dict | inputs: dict | ||||
| single_loop_run: Optional[SingleLoopRunEntity] = None | single_loop_run: Optional[SingleLoopRunEntity] = None | ||||
| # Import TraceQueueManager at runtime to resolve forward references | |||||
| from core.ops.ops_trace_manager import TraceQueueManager | |||||
| # Rebuild models that use forward references | |||||
| AppGenerateEntity.model_rebuild() | |||||
| EasyUIBasedAppGenerateEntity.model_rebuild() | |||||
| ConversationAppGenerateEntity.model_rebuild() | |||||
| ChatAppGenerateEntity.model_rebuild() | |||||
| CompletionAppGenerateEntity.model_rebuild() | |||||
| AgentChatAppGenerateEntity.model_rebuild() | |||||
| AdvancedChatAppGenerateEntity.model_rebuild() | |||||
| WorkflowAppGenerateEntity.model_rebuild() | |||||
| RagPipelineGenerateEntity.model_rebuild() |
| from typing import Any, Optional | |||||
| from typing import TYPE_CHECKING, Any, Optional | |||||
| from openai import BaseModel | from openai import BaseModel | ||||
| from pydantic import Field | from pydantic import Field | ||||
| from core.app.entities.app_invoke_entities import InvokeFrom | |||||
| from core.datasource.entities.datasource_entities import DatasourceInvokeFrom | from core.datasource.entities.datasource_entities import DatasourceInvokeFrom | ||||
| if TYPE_CHECKING: | |||||
| from core.app.entities.app_invoke_entities import InvokeFrom | |||||
| class DatasourceRuntime(BaseModel): | class DatasourceRuntime(BaseModel): | ||||
| """ | """ | ||||
| tenant_id: str | tenant_id: str | ||||
| datasource_id: Optional[str] = None | datasource_id: Optional[str] = None | ||||
| invoke_from: Optional[InvokeFrom] = None | |||||
| invoke_from: Optional["InvokeFrom"] = None | |||||
| datasource_invoke_from: Optional[DatasourceInvokeFrom] = None | datasource_invoke_from: Optional[DatasourceInvokeFrom] = None | ||||
| credentials: dict[str, Any] = Field(default_factory=dict) | credentials: dict[str, Any] = Field(default_factory=dict) | ||||
| runtime_parameters: dict[str, Any] = Field(default_factory=dict) | runtime_parameters: dict[str, Any] = Field(default_factory=dict) | ||||
| """ | """ | ||||
| def __init__(self): | def __init__(self): | ||||
| # Import InvokeFrom locally to avoid circular import | |||||
| from core.app.entities.app_invoke_entities import InvokeFrom | |||||
| super().__init__( | super().__init__( | ||||
| tenant_id="fake_tenant_id", | tenant_id="fake_tenant_id", | ||||
| datasource_id="fake_datasource_id", | datasource_id="fake_datasource_id", |
| if self.related_id is None: | if self.related_id is None: | ||||
| raise ValueError("Missing file related_id") | raise ValueError("Missing file related_id") | ||||
| return helpers.get_signed_file_url(upload_file_id=self.related_id) | return helpers.get_signed_file_url(upload_file_id=self.related_id) | ||||
| elif self.transfer_method == FileTransferMethod.TOOL_FILE or self.transfer_method == FileTransferMethod.DATASOURCE_FILE: | |||||
| elif self.transfer_method in [FileTransferMethod.TOOL_FILE, FileTransferMethod.DATASOURCE_FILE]: | |||||
| assert self.related_id is not None | assert self.related_id is not None | ||||
| assert self.extension is not None | assert self.extension is not None | ||||
| return sign_tool_file(tool_file_id=self.related_id, extension=self.extension) | return sign_tool_file(tool_file_id=self.related_id, extension=self.extension) | ||||
| def to_plugin_parameter(self) -> dict[str, Any]: | def to_plugin_parameter(self) -> dict[str, Any]: | ||||
| return { | return { | ||||
| "dify_model_identity": FILE_MODEL_IDENTITY, | "dify_model_identity": FILE_MODEL_IDENTITY, |
| preview = [] | preview = [] | ||||
| for content in chunks: | for content in chunks: | ||||
| preview.append({"content": content}) | preview.append({"content": content}) | ||||
| return {"chunk_structure": IndexType.PARAGRAPH_INDEX, | |||||
| "preview": preview, | |||||
| "total_segments": len(chunks) | |||||
| } | |||||
| return {"chunk_structure": IndexType.PARAGRAPH_INDEX, "preview": preview, "total_segments": len(chunks)} | |||||
| else: | else: | ||||
| raise ValueError("Chunks is not a list") | raise ValueError("Chunks is not a list") |
| vector.create(all_child_documents) | vector.create(all_child_documents) | ||||
| def format_preview(self, chunks: Any) -> Mapping[str, Any]: | def format_preview(self, chunks: Any) -> Mapping[str, Any]: | ||||
| parent_childs = ParentChildStructureChunk(**chunks) | parent_childs = ParentChildStructureChunk(**chunks) | ||||
| preview = [] | preview = [] | ||||
| for parent_child in parent_childs.parent_child_chunks: | for parent_child in parent_childs.parent_child_chunks: |
| """Returns all schemas for a version in the API format""" | """Returns all schemas for a version in the API format""" | ||||
| version_schemas = self.versions.get(version, {}) | version_schemas = self.versions.get(version, {}) | ||||
| result = [] | |||||
| result: list[Mapping[str, Any]] = [] | |||||
| for schema_name, schema in version_schemas.items(): | for schema_name, schema in version_schemas.items(): | ||||
| result.append({"name": schema_name, "label": schema.get("title", schema_name), "schema": schema}) | result.append({"name": schema_name, "label": schema.get("title", schema_name), "schema": schema}) | ||||
| from core.workflow.nodes.datasource.datasource_node import DatasourceNode | from core.workflow.nodes.datasource.datasource_node import DatasourceNode | ||||
| if isinstance(self, DatasourceNode): | if isinstance(self, DatasourceNode): | ||||
| start_event.provider_id = f"{getattr(self.get_base_node_data(), 'plugin_id', '')}/{getattr(self.get_base_node_data(), 'provider_name', '')}" | |||||
| plugin_id = getattr(self.get_base_node_data(), "plugin_id", "") | |||||
| provider_name = getattr(self.get_base_node_data(), "provider_name", "") | |||||
| start_event.provider_id = f"{plugin_id}/{provider_name}" | |||||
| start_event.provider_type = getattr(self.get_base_node_data(), "provider_type", "") | start_event.provider_type = getattr(self.get_base_node_data(), "provider_type", "") | ||||
| from typing import cast | from typing import cast |
| """datasource_oauth_refresh | """datasource_oauth_refresh | ||||
| Revision ID: 17d4db47800c | Revision ID: 17d4db47800c | ||||
| Revises: 223c3f882c69 | |||||
| Revises: 74e5f667f4b7 | |||||
| Create Date: 2025-08-11 11:38:03.662874 | Create Date: 2025-08-11 11:38:03.662874 | ||||
| """ | """ | ||||
| # revision identifiers, used by Alembic. | # revision identifiers, used by Alembic. | ||||
| revision = '17d4db47800c' | revision = '17d4db47800c' | ||||
| down_revision = '223c3f882c69' | |||||
| down_revision = '74e5f667f4b7' | |||||
| branch_labels = None | branch_labels = None | ||||
| depends_on = None | depends_on = None | ||||
| # ### commands auto generated by Alembic - please adjust! ### | # ### commands auto generated by Alembic - please adjust! ### | ||||
| with op.batch_alter_table('datasource_providers', schema=None) as batch_op: | with op.batch_alter_table('datasource_providers', schema=None) as batch_op: | ||||
| batch_op.add_column(sa.Column('expires_at', sa.Integer(), nullable=False, server_default='-1')) | batch_op.add_column(sa.Column('expires_at', sa.Integer(), nullable=False, server_default='-1')) | ||||
| # ### end Alembic commands ### | # ### end Alembic commands ### | ||||
| credentials = self.list_datasource_credentials( | credentials = self.list_datasource_credentials( | ||||
| tenant_id=tenant_id, provider=datasource.provider, plugin_id=datasource.plugin_id | tenant_id=tenant_id, provider=datasource.provider, plugin_id=datasource.plugin_id | ||||
| ) | ) | ||||
| redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{datasource_provider_id}/datasource/callback" | |||||
| redirect_uri = "{}/console/api/oauth/plugin/{}/datasource/callback".format( | |||||
| dify_config.CONSOLE_API_URL, datasource_provider_id | |||||
| ) | |||||
| datasource_credentials.append( | datasource_credentials.append( | ||||
| { | { | ||||
| "provider": datasource.provider, | "provider": datasource.provider, |
| import dataclasses | import dataclasses | ||||
| import json | |||||
| from collections.abc import Mapping | from collections.abc import Mapping | ||||
| from typing import Any, TypeAlias | |||||
| from enum import StrEnum | |||||
| from typing import Any, Generic, TypeAlias, TypedDict, TypeVar, overload | |||||
| from configs import dify_config | from configs import dify_config | ||||
| from core.file.models import File | |||||
| from core.variables.segments import ( | from core.variables.segments import ( | ||||
| ArrayFileSegment, | ArrayFileSegment, | ||||
| ArraySegment, | ArraySegment, | ||||
| Segment, | Segment, | ||||
| StringSegment, | StringSegment, | ||||
| ) | ) | ||||
| from core.variables.utils import dumps_with_segments | |||||
| LARGE_VARIABLE_THRESHOLD = 10 * 1024 # 100KB in bytes | |||||
| OBJECT_CHAR_LIMIT = 5000 | |||||
| ARRAY_CHAR_LIMIT = 1000 | |||||
| _MAX_DEPTH = 100 | |||||
| _MAX_DEPTH = 20 | |||||
| class _QAKeys: | |||||
| """dict keys for _QAStructure""" | |||||
| QA_CHUNKS = "qa_chunks" | |||||
| QUESTION = "question" | |||||
| ANSWER = "answer" | |||||
| class _PCKeys: | |||||
| """dict keys for _ParentChildStructure""" | |||||
| PARENT_MODE = "parent_mode" | |||||
| PARENT_CHILD_CHUNKS = "parent_child_chunks" | |||||
| PARENT_CONTENT = "parent_content" | |||||
| CHILD_CONTENTS = "child_contents" | |||||
| class _QAStructureItem(TypedDict): | |||||
| question: str | |||||
| answer: str | |||||
| class _QAStructure(TypedDict): | |||||
| qa_chunks: list[_QAStructureItem] | |||||
| class _ParentChildChunkItem(TypedDict): | |||||
| parent_content: str | |||||
| child_contents: list[str] | |||||
| class _ParentChildStructure(TypedDict): | |||||
| parent_mode: str | |||||
| parent_child_chunks: list[_ParentChildChunkItem] | |||||
| class _SpecialChunkType(StrEnum): | |||||
| parent_child = "parent_child" | |||||
| qa = "qa" | |||||
| _T = TypeVar("_T") | |||||
| @dataclasses.dataclass(frozen=True) | |||||
| class _PartResult(Generic[_T]): | |||||
| value: _T | |||||
| value_size: int | |||||
| truncated: bool | |||||
| class MaxDepthExceededError(Exception): | class MaxDepthExceededError(Exception): | ||||
| Uses recursive size calculation to avoid repeated JSON serialization. | Uses recursive size calculation to avoid repeated JSON serialization. | ||||
| """ | """ | ||||
| _JSON_SEPARATORS = (",", ":") | |||||
| def __init__( | def __init__( | ||||
| self, | self, | ||||
| string_length_limit=5000, | string_length_limit=5000, | ||||
| array_element_limit: int = 20, | array_element_limit: int = 20, | ||||
| max_size_bytes: int = LARGE_VARIABLE_THRESHOLD, | |||||
| max_size_bytes: int = 1024_000, # 100KB | |||||
| ): | ): | ||||
| if string_length_limit <= 3: | if string_length_limit <= 3: | ||||
| raise ValueError("string_length_limit should be greater than 3.") | raise ValueError("string_length_limit should be greater than 3.") | ||||
| of a WorkflowNodeExecution record. This ensures the mappings remain within the | of a WorkflowNodeExecution record. This ensures the mappings remain within the | ||||
| specified size limits while preserving their structure. | specified size limits while preserving their structure. | ||||
| """ | """ | ||||
| size = self.calculate_json_size(v) | |||||
| if size < self._max_size_bytes: | |||||
| return v, False | |||||
| budget = self._max_size_bytes | budget = self._max_size_bytes | ||||
| is_truncated = False | is_truncated = False | ||||
| truncated_mapping: dict[str, Any] = {} | truncated_mapping: dict[str, Any] = {} | ||||
| size = len(v.items()) | |||||
| remaining = size | |||||
| length = len(v.items()) | |||||
| used_size = 0 | |||||
| for key, value in v.items(): | for key, value in v.items(): | ||||
| budget -= self.calculate_json_size(key) | |||||
| if budget < 0: | |||||
| break | |||||
| truncated_value, value_truncated = self._truncate_value_to_budget(value, budget // remaining) | |||||
| if value_truncated: | |||||
| is_truncated = True | |||||
| truncated_mapping[key] = truncated_value | |||||
| # TODO(QuantumGhost): This approach is inefficient. Ideally, the truncation function should directly | |||||
| # report the size of the truncated value. | |||||
| budget -= self.calculate_json_size(truncated_value) + 2 # ":" and "," | |||||
| used_size += self.calculate_json_size(key) | |||||
| if used_size > budget: | |||||
| truncated_mapping[key] = "..." | |||||
| continue | |||||
| value_budget = (budget - used_size) // (length - len(truncated_mapping)) | |||||
| if isinstance(value, Segment): | |||||
| part_result = self._truncate_segment(value, value_budget) | |||||
| else: | |||||
| part_result = self._truncate_json_primitives(value, value_budget) | |||||
| is_truncated = is_truncated or part_result.truncated | |||||
| truncated_mapping[key] = part_result.value | |||||
| used_size += part_result.value_size | |||||
| return truncated_mapping, is_truncated | return truncated_mapping, is_truncated | ||||
| @staticmethod | @staticmethod | ||||
| return True | return True | ||||
| def truncate(self, segment: Segment) -> TruncationResult: | def truncate(self, segment: Segment) -> TruncationResult: | ||||
| if isinstance(segment, StringSegment): | |||||
| result = self._truncate_segment(segment, self._string_length_limit) | |||||
| else: | |||||
| result = self._truncate_segment(segment, self._max_size_bytes) | |||||
| if result.value_size > self._max_size_bytes: | |||||
| if isinstance(result.value, str): | |||||
| result = self._truncate_string(result.value, self._max_size_bytes) | |||||
| return TruncationResult(StringSegment(value=result.value), True) | |||||
| # Apply final fallback - convert to JSON string and truncate | |||||
| json_str = dumps_with_segments(result.value, ensure_ascii=False) | |||||
| if len(json_str) > self._max_size_bytes: | |||||
| json_str = json_str[: self._max_size_bytes] + "..." | |||||
| return TruncationResult(result=StringSegment(value=json_str), truncated=True) | |||||
| return TruncationResult( | |||||
| result=segment.model_copy(update={"value": result.value.value}), truncated=result.truncated | |||||
| ) | |||||
| def _truncate_segment(self, segment: Segment, target_size: int) -> _PartResult[Segment]: | |||||
| """ | """ | ||||
| Apply smart truncation to a variable value. | Apply smart truncation to a variable value. | ||||
| """ | """ | ||||
| if not VariableTruncator._segment_need_truncation(segment): | if not VariableTruncator._segment_need_truncation(segment): | ||||
| return TruncationResult(result=segment, truncated=False) | |||||
| return _PartResult(segment, self.calculate_json_size(segment.value), False) | |||||
| result: _PartResult[Any] | |||||
| # Apply type-specific truncation with target size | # Apply type-specific truncation with target size | ||||
| if isinstance(segment, ArraySegment): | if isinstance(segment, ArraySegment): | ||||
| truncated_value, was_truncated = self._truncate_array(segment.value, self._max_size_bytes) | |||||
| result = self._truncate_array(segment.value, target_size) | |||||
| elif isinstance(segment, StringSegment): | elif isinstance(segment, StringSegment): | ||||
| truncated_value, was_truncated = self._truncate_string(segment.value) | |||||
| result = self._truncate_string(segment.value, target_size) | |||||
| elif isinstance(segment, ObjectSegment): | elif isinstance(segment, ObjectSegment): | ||||
| truncated_value, was_truncated = self._truncate_object(segment.value, self._max_size_bytes) | |||||
| result = self._truncate_object(segment.value, target_size) | |||||
| else: | else: | ||||
| raise AssertionError("this should be unreachable.") | raise AssertionError("this should be unreachable.") | ||||
| # Check if we still exceed the final character limit after type-specific truncation | |||||
| if not was_truncated: | |||||
| return TruncationResult(result=segment, truncated=False) | |||||
| truncated_size = self.calculate_json_size(truncated_value) | |||||
| if truncated_size > self._max_size_bytes: | |||||
| if isinstance(truncated_value, str): | |||||
| return TruncationResult(StringSegment(value=truncated_value[: self._max_size_bytes - 3]), True) | |||||
| # Apply final fallback - convert to JSON string and truncate | |||||
| json_str = json.dumps(truncated_value, ensure_ascii=False, separators=self._JSON_SEPARATORS) | |||||
| if len(json_str) > self._max_size_bytes: | |||||
| json_str = json_str[: self._max_size_bytes] + "..." | |||||
| return TruncationResult(result=StringSegment(value=json_str), truncated=True) | |||||
| return TruncationResult(result=segment.model_copy(update={"value": truncated_value}), truncated=True) | |||||
| return _PartResult( | |||||
| value=segment.model_copy(update={"value": result.value}), | |||||
| value_size=result.value_size, | |||||
| truncated=result.truncated, | |||||
| ) | |||||
| @staticmethod | @staticmethod | ||||
| def calculate_json_size(value: Any, depth=0) -> int: | def calculate_json_size(value: Any, depth=0) -> int: | ||||
| """Recursively calculate JSON size without serialization.""" | """Recursively calculate JSON size without serialization.""" | ||||
| if isinstance(value, Segment): | |||||
| return VariableTruncator.calculate_json_size(value.value) | |||||
| if depth > _MAX_DEPTH: | if depth > _MAX_DEPTH: | ||||
| raise MaxDepthExceededError() | raise MaxDepthExceededError() | ||||
| if isinstance(value, str): | if isinstance(value, str): | ||||
| # For strings, we need to account for escaping and quotes | |||||
| # Rough estimate: each character might need escaping, plus 2 for quotes | |||||
| return len(value.encode("utf-8")) + 2 | |||||
| # Ideally, the size of strings should be calculated based on their utf-8 encoded length. | |||||
| # However, this adds complexity as we would need to compute encoded sizes consistently | |||||
| # throughout the code. Therefore, we approximate the size using the string's length. | |||||
| # Rough estimate: number of characters, plus 2 for quotes | |||||
| return len(value) + 2 | |||||
| elif isinstance(value, (int, float)): | elif isinstance(value, (int, float)): | ||||
| return len(str(value)) | return len(str(value)) | ||||
| elif isinstance(value, bool): | elif isinstance(value, bool): | ||||
| total += 1 # ":" | total += 1 # ":" | ||||
| total += VariableTruncator.calculate_json_size(value[key], depth=depth + 1) | total += VariableTruncator.calculate_json_size(value[key], depth=depth + 1) | ||||
| return total | return total | ||||
| elif isinstance(value, File): | |||||
| return VariableTruncator.calculate_json_size(value.model_dump(), depth=depth + 1) | |||||
| else: | else: | ||||
| raise UnknownTypeError(f"got unknown type {type(value)}") | raise UnknownTypeError(f"got unknown type {type(value)}") | ||||
| def _truncate_string(self, value: str) -> tuple[str, bool]: | |||||
| """Truncate string values.""" | |||||
| if len(value) <= self._string_length_limit: | |||||
| return value, False | |||||
| return value[: self._string_length_limit - 3] + "...", True | |||||
| def _truncate_string(self, value: str, target_size: int) -> _PartResult[str]: | |||||
| if (size := self.calculate_json_size(value)) < target_size: | |||||
| return _PartResult(value, size, False) | |||||
| if target_size < 5: | |||||
| return _PartResult("...", 5, True) | |||||
| truncated_size = min(self._string_length_limit, target_size - 5) | |||||
| truncated_value = value[:truncated_size] + "..." | |||||
| return _PartResult(truncated_value, self.calculate_json_size(truncated_value), True) | |||||
| def _truncate_array(self, value: list, target_size: int) -> tuple[list, bool]: | |||||
| def _truncate_array(self, value: list, target_size: int) -> _PartResult[list]: | |||||
| """ | """ | ||||
| Truncate array with correct strategy: | Truncate array with correct strategy: | ||||
| 1. First limit to 20 items | 1. First limit to 20 items | ||||
| 2. If still too large, truncate individual items | 2. If still too large, truncate individual items | ||||
| """ | """ | ||||
| # Step 1: Limit to first 20 items | |||||
| limited_items = value[: self._array_element_limit] | |||||
| was_truncated = len(limited_items) < len(value) | |||||
| truncated_value: list[Any] = [] | |||||
| truncated = False | |||||
| used_size = self.calculate_json_size([]) | |||||
| # Step 2: Check if we still exceed the target size | |||||
| current_size = self.calculate_json_size(limited_items) | |||||
| if current_size <= target_size: | |||||
| return limited_items, was_truncated | |||||
| target_length = self._array_element_limit | |||||
| # Step 3: Truncate individual items to fit within target size | |||||
| truncated_items = [] | |||||
| remaining_size = target_size - 2 # Account for [] | |||||
| for i, item in enumerate(limited_items): | |||||
| for i, item in enumerate(value): | |||||
| if i >= target_length: | |||||
| return _PartResult(truncated_value, used_size, True) | |||||
| if i > 0: | if i > 0: | ||||
| remaining_size -= 1 # Account for comma | |||||
| used_size += 1 # Account for comma | |||||
| if remaining_size <= 0: | |||||
| if used_size > target_size: | |||||
| break | break | ||||
| # Calculate how much space this item can use | |||||
| remaining_items = len(limited_items) - i | |||||
| item_budget = remaining_size // remaining_items | |||||
| # Truncate the item to fit within budget | |||||
| truncated_item, item_truncated = self._truncate_item_to_budget(item, item_budget) | |||||
| truncated_items.append(truncated_item) | |||||
| part_result = self._truncate_json_primitives(item, target_size - used_size) | |||||
| truncated_value.append(part_result.value) | |||||
| used_size += part_result.value_size | |||||
| truncated = part_result.truncated | |||||
| return _PartResult(truncated_value, used_size, truncated) | |||||
| # Update remaining size | |||||
| item_size = self.calculate_json_size(truncated_item) | |||||
| remaining_size -= item_size | |||||
| @classmethod | |||||
| def _maybe_qa_structure(cls, m: Mapping[str, Any]) -> bool: | |||||
| qa_chunks = m.get(_QAKeys.QA_CHUNKS) | |||||
| if qa_chunks is None: | |||||
| return False | |||||
| if not isinstance(qa_chunks, list): | |||||
| return False | |||||
| return True | |||||
| if item_truncated: | |||||
| was_truncated = True | |||||
| @classmethod | |||||
| def _maybe_parent_child_structure(cls, m: Mapping[str, Any]) -> bool: | |||||
| parent_mode = m.get(_PCKeys.PARENT_MODE) | |||||
| if parent_mode is None: | |||||
| return False | |||||
| if not isinstance(parent_mode, str): | |||||
| return False | |||||
| parent_child_chunks = m.get(_PCKeys.PARENT_CHILD_CHUNKS) | |||||
| if parent_child_chunks is None: | |||||
| return False | |||||
| if not isinstance(parent_child_chunks, list): | |||||
| return False | |||||
| return truncated_items, True | |||||
| return True | |||||
| def _truncate_object(self, value: Mapping[str, Any], target_size: int) -> tuple[Mapping[str, Any], bool]: | |||||
| def _truncate_object(self, mapping: Mapping[str, Any], target_size: int) -> _PartResult[Mapping[str, Any]]: | |||||
| """ | """ | ||||
| Truncate object with key preservation priority. | Truncate object with key preservation priority. | ||||
| 1. Keep all keys, truncate values to fit within budget | 1. Keep all keys, truncate values to fit within budget | ||||
| 2. If still too large, drop keys starting from the end | 2. If still too large, drop keys starting from the end | ||||
| """ | """ | ||||
| if not value: | |||||
| return value, False | |||||
| if not mapping: | |||||
| return _PartResult(mapping, self.calculate_json_size(mapping), False) | |||||
| truncated_obj = {} | truncated_obj = {} | ||||
| was_truncated = False | |||||
| remaining_size = target_size - 2 # Account for {} | |||||
| truncated = False | |||||
| used_size = self.calculate_json_size({}) | |||||
| # Sort keys to ensure deterministic behavior | # Sort keys to ensure deterministic behavior | ||||
| sorted_keys = sorted(value.keys()) | |||||
| sorted_keys = sorted(mapping.keys()) | |||||
| for i, key in enumerate(sorted_keys): | for i, key in enumerate(sorted_keys): | ||||
| val = value[key] | |||||
| if i > 0: | |||||
| remaining_size -= 1 # Account for comma | |||||
| if remaining_size <= 0: | |||||
| if used_size > target_size: | |||||
| # No more room for additional key-value pairs | # No more room for additional key-value pairs | ||||
| was_truncated = True | |||||
| truncated = True | |||||
| break | break | ||||
| pair_size = 0 | |||||
| if i > 0: | |||||
| pair_size += 1 # Account for comma | |||||
| # Calculate budget for this key-value pair | # Calculate budget for this key-value pair | ||||
| key_size = self.calculate_json_size(str(key)) + 1 # +1 for ":" | |||||
| # do not try to truncate keys, as we want to keep the structure of | |||||
| # object. | |||||
| key_size = self.calculate_json_size(key) + 1 # +1 for ":" | |||||
| pair_size += key_size | |||||
| remaining_pairs = len(sorted_keys) - i | remaining_pairs = len(sorted_keys) - i | ||||
| value_budget = max(0, (remaining_size - key_size) // remaining_pairs) | |||||
| value_budget = max(0, (target_size - pair_size - used_size) // remaining_pairs) | |||||
| if value_budget <= 0: | if value_budget <= 0: | ||||
| was_truncated = True | |||||
| truncated = True | |||||
| break | break | ||||
| # Truncate the value to fit within budget | # Truncate the value to fit within budget | ||||
| truncated_val, val_truncated = self._truncate_value_to_budget(val, value_budget) | |||||
| truncated_obj[key] = truncated_val | |||||
| if val_truncated: | |||||
| was_truncated = True | |||||
| # Update remaining size | |||||
| pair_size = key_size + self.calculate_json_size(truncated_val) | |||||
| remaining_size -= pair_size | |||||
| return truncated_obj, was_truncated or len(truncated_obj) < len(value) | |||||
| def _truncate_item_to_budget(self, item: Any, budget: int) -> tuple[Any, bool]: | |||||
| """Truncate an array item to fit within a size budget.""" | |||||
| if isinstance(item, str): | |||||
| # For strings, truncate to fit within budget (accounting for quotes) | |||||
| max_chars = max(0, budget - 5) # -5 for quotes and potential "..." | |||||
| max_chars = min(max_chars, ARRAY_CHAR_LIMIT) | |||||
| if len(item) <= max_chars: | |||||
| return item, False | |||||
| return item[:max_chars] + "...", True | |||||
| elif isinstance(item, dict): | |||||
| # For objects, recursively truncate | |||||
| return self._truncate_object(item, budget) | |||||
| elif isinstance(item, list): | |||||
| # For nested arrays, recursively truncate | |||||
| return self._truncate_array(item, budget) | |||||
| else: | |||||
| # For other types, check if they fit | |||||
| item_size = self.calculate_json_size(item) | |||||
| if item_size <= budget: | |||||
| return item, False | |||||
| value = mapping[key] | |||||
| if isinstance(value, Segment): | |||||
| value_result = self._truncate_segment(value, value_budget) | |||||
| else: | else: | ||||
| # Convert to string and truncate | |||||
| str_item = str(item) | |||||
| return self._truncate_item_to_budget(str_item, budget) | |||||
| value_result = self._truncate_json_primitives(mapping[key], value_budget) | |||||
| truncated_obj[key] = value_result.value | |||||
| pair_size += value_result.value_size | |||||
| used_size += pair_size | |||||
| if value_result.truncated: | |||||
| truncated = True | |||||
| return _PartResult(truncated_obj, used_size, truncated) | |||||
| def _truncate_value_to_budget(self, val: Any, budget: int) -> tuple[Any, bool]: | |||||
| @overload | |||||
| def _truncate_json_primitives(self, val: str, target_size: int) -> _PartResult[str]: ... | |||||
| @overload | |||||
| def _truncate_json_primitives(self, val: list, target_size: int) -> _PartResult[list]: ... | |||||
| @overload | |||||
| def _truncate_json_primitives(self, val: dict, target_size: int) -> _PartResult[dict]: ... | |||||
| @overload | |||||
| def _truncate_json_primitives(self, val: bool, target_size: int) -> _PartResult[bool]: ... | |||||
| @overload | |||||
| def _truncate_json_primitives(self, val: int, target_size: int) -> _PartResult[int]: ... | |||||
| @overload | |||||
| def _truncate_json_primitives(self, val: float, target_size: int) -> _PartResult[float]: ... | |||||
| @overload | |||||
| def _truncate_json_primitives(self, val: None, target_size: int) -> _PartResult[None]: ... | |||||
| def _truncate_json_primitives( | |||||
| self, val: str | list | dict | bool | int | float | None, target_size: int | |||||
| ) -> _PartResult[Any]: | |||||
| """Truncate a value within an object to fit within budget.""" | """Truncate a value within an object to fit within budget.""" | ||||
| if isinstance(val, str): | if isinstance(val, str): | ||||
| # For strings, respect OBJECT_CHAR_LIMIT but also budget | |||||
| max_chars = min(OBJECT_CHAR_LIMIT, max(0, budget - 5)) # -5 for quotes and "..." | |||||
| if len(val) <= max_chars: | |||||
| return val, False | |||||
| return val[:max_chars] + "...", True | |||||
| return self._truncate_string(val, target_size) | |||||
| elif isinstance(val, list): | elif isinstance(val, list): | ||||
| return self._truncate_array(val, budget) | |||||
| return self._truncate_array(val, target_size) | |||||
| elif isinstance(val, dict): | elif isinstance(val, dict): | ||||
| return self._truncate_object(val, budget) | |||||
| return self._truncate_object(val, target_size) | |||||
| elif val is None or isinstance(val, (bool, int, float)): | |||||
| return _PartResult(val, self.calculate_json_size(val), False) | |||||
| else: | else: | ||||
| # For other types, check if they fit | |||||
| val_size = self.calculate_json_size(val) | |||||
| if val_size <= budget: | |||||
| return val, False | |||||
| else: | |||||
| # Convert to string and truncate | |||||
| return self._truncate_value_to_budget(str(val), budget) | |||||
| raise AssertionError("this statement should be unreachable.") |
| FileSegment, | FileSegment, | ||||
| ) | ) | ||||
| from core.variables.types import SegmentType | from core.variables.types import SegmentType | ||||
| from core.variables.utils import dumps_with_segments | |||||
| from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID | from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID | ||||
| from core.workflow.enums import SystemVariableKey | from core.workflow.enums import SystemVariableKey | ||||
| from core.workflow.nodes import NodeType | from core.workflow.nodes import NodeType | ||||
| filename = f"{self._generate_filename(name)}.txt" | filename = f"{self._generate_filename(name)}.txt" | ||||
| else: | else: | ||||
| # For other types, store as JSON | # For other types, store as JSON | ||||
| original_content_serialized = json.dumps(value_seg.value, ensure_ascii=False, separators=(",", ":")) | |||||
| original_content_serialized = dumps_with_segments(value_seg.value, ensure_ascii=False) | |||||
| content_type = "application/json" | content_type = "application/json" | ||||
| filename = f"{self._generate_filename(name)}.json" | filename = f"{self._generate_filename(name)}.json" | ||||
| upload_file_ids.append(upload_file_id) | upload_file_ids.append(upload_file_id) | ||||
| files_deleted += 1 | files_deleted += 1 | ||||
| except Exception as e: | except Exception as e: | ||||
| logging.warning(f"Failed to delete storage object {storage_key}: {e}") | |||||
| logging.exception("Failed to delete storage object %s", storage_key) | |||||
| # Continue with database cleanup even if storage deletion fails | # Continue with database cleanup even if storage deletion fails | ||||
| upload_file_ids.append(upload_file_id) | upload_file_ids.append(upload_file_id) | ||||
| """ | """ | ||||
| conn.execute(sa.text(delete_variable_files_sql), {"file_ids": tuple(file_ids)}) | conn.execute(sa.text(delete_variable_files_sql), {"file_ids": tuple(file_ids)}) | ||||
| except Exception as e: | |||||
| logging.exception(f"Error deleting draft variable offload data: {e}") | |||||
| except Exception: | |||||
| logging.exception("Error deleting draft variable offload data:") | |||||
| # Don't raise, as we want to continue with the main deletion process | # Don't raise, as we want to continue with the main deletion process | ||||
| return files_deleted | return files_deleted |
| import uuid | |||||
| import pytest | |||||
| from sqlalchemy.orm import Session, joinedload, selectinload | |||||
| from extensions.ext_database import db | |||||
| from libs.datetime_utils import naive_utc_now | |||||
| from libs.uuid_utils import uuidv7 | |||||
| from models.enums import CreatorUserRole | |||||
| from models.model import UploadFile | |||||
| from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload, WorkflowNodeExecutionTriggeredFrom | |||||
| @pytest.fixture | |||||
| def session(flask_req_ctx): | |||||
| with Session(bind=db.engine, expire_on_commit=False) as session: | |||||
| yield session | |||||
| def test_offload(session, setup_account): | |||||
| tenant_id = str(uuid.uuid4()) | |||||
| app_id = str(uuid.uuid4()) | |||||
| # step 1: create a UploadFile | |||||
| input_upload_file = UploadFile( | |||||
| tenant_id=tenant_id, | |||||
| storage_type="local", | |||||
| key="fake_storage_key", | |||||
| name="test_file.txt", | |||||
| size=1024, | |||||
| extension="txt", | |||||
| mime_type="text/plain", | |||||
| created_by_role=CreatorUserRole.ACCOUNT, | |||||
| created_by=setup_account.id, | |||||
| created_at=naive_utc_now(), | |||||
| used=False, | |||||
| ) | |||||
| output_upload_file = UploadFile( | |||||
| tenant_id=tenant_id, | |||||
| storage_type="local", | |||||
| key="fake_storage_key", | |||||
| name="test_file.txt", | |||||
| size=1024, | |||||
| extension="txt", | |||||
| mime_type="text/plain", | |||||
| created_by_role=CreatorUserRole.ACCOUNT, | |||||
| created_by=setup_account.id, | |||||
| created_at=naive_utc_now(), | |||||
| used=False, | |||||
| ) | |||||
| session.add(input_upload_file) | |||||
| session.add(output_upload_file) | |||||
| session.flush() | |||||
| # step 2: create a WorkflowNodeExecutionModel | |||||
| node_execution = WorkflowNodeExecutionModel( | |||||
| id=str(uuid.uuid4()), | |||||
| tenant_id=tenant_id, | |||||
| app_id=app_id, | |||||
| workflow_id=str(uuid.uuid4()), | |||||
| triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | |||||
| index=1, | |||||
| node_id="test_node_id", | |||||
| node_type="test", | |||||
| title="Test Node", | |||||
| status="succeeded", | |||||
| created_by_role=CreatorUserRole.ACCOUNT.value, | |||||
| created_by=setup_account.id, | |||||
| ) | |||||
| session.add(node_execution) | |||||
| session.flush() | |||||
| # step 3: create a WorkflowNodeExecutionOffload | |||||
| offload = WorkflowNodeExecutionOffload( | |||||
| id=uuidv7(), | |||||
| tenant_id=tenant_id, | |||||
| app_id=app_id, | |||||
| node_execution_id=node_execution.id, | |||||
| inputs_file_id=input_upload_file.id, | |||||
| outputs_file_id=output_upload_file.id, | |||||
| ) | |||||
| session.add(offload) | |||||
| session.flush() | |||||
| # Test preloading - this should work without raising LazyLoadError | |||||
| result = ( | |||||
| session.query(WorkflowNodeExecutionModel) | |||||
| .options( | |||||
| selectinload(WorkflowNodeExecutionModel.offload_data).options( | |||||
| joinedload( | |||||
| WorkflowNodeExecutionOffload.inputs_file, | |||||
| ), | |||||
| joinedload( | |||||
| WorkflowNodeExecutionOffload.outputs_file, | |||||
| ), | |||||
| ) | |||||
| ) | |||||
| .filter(WorkflowNodeExecutionModel.id == node_execution.id) | |||||
| .first() | |||||
| ) | |||||
| # Verify the relationships are properly loaded | |||||
| assert result is not None | |||||
| assert result.offload_data is not None | |||||
| assert result.offload_data.inputs_file is not None | |||||
| assert result.offload_data.inputs_file.id == input_upload_file.id | |||||
| assert result.offload_data.inputs_file.name == "test_file.txt" | |||||
| # Test the computed properties | |||||
| assert result.inputs_truncated is True | |||||
| assert result.outputs_truncated is False | |||||
| assert False | |||||
| def _test_offload_save(session, setup_account): | |||||
| tenant_id = str(uuid.uuid4()) | |||||
| app_id = str(uuid.uuid4()) | |||||
| # step 1: create a UploadFile | |||||
| input_upload_file = UploadFile( | |||||
| tenant_id=tenant_id, | |||||
| storage_type="local", | |||||
| key="fake_storage_key", | |||||
| name="test_file.txt", | |||||
| size=1024, | |||||
| extension="txt", | |||||
| mime_type="text/plain", | |||||
| created_by_role=CreatorUserRole.ACCOUNT, | |||||
| created_by=setup_account.id, | |||||
| created_at=naive_utc_now(), | |||||
| used=False, | |||||
| ) | |||||
| output_upload_file = UploadFile( | |||||
| tenant_id=tenant_id, | |||||
| storage_type="local", | |||||
| key="fake_storage_key", | |||||
| name="test_file.txt", | |||||
| size=1024, | |||||
| extension="txt", | |||||
| mime_type="text/plain", | |||||
| created_by_role=CreatorUserRole.ACCOUNT, | |||||
| created_by=setup_account.id, | |||||
| created_at=naive_utc_now(), | |||||
| used=False, | |||||
| ) | |||||
| node_execution_id = id = str(uuid.uuid4()) | |||||
| # step 3: create a WorkflowNodeExecutionOffload | |||||
| offload = WorkflowNodeExecutionOffload( | |||||
| id=uuidv7(), | |||||
| tenant_id=tenant_id, | |||||
| app_id=app_id, | |||||
| node_execution_id=node_execution_id, | |||||
| ) | |||||
| offload.inputs_file = input_upload_file | |||||
| offload.outputs_file = output_upload_file | |||||
| # step 2: create a WorkflowNodeExecutionModel | |||||
| node_execution = WorkflowNodeExecutionModel( | |||||
| id=str(uuid.uuid4()), | |||||
| tenant_id=tenant_id, | |||||
| app_id=app_id, | |||||
| workflow_id=str(uuid.uuid4()), | |||||
| triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | |||||
| index=1, | |||||
| node_id="test_node_id", | |||||
| node_type="test", | |||||
| title="Test Node", | |||||
| status="succeeded", | |||||
| created_by_role=CreatorUserRole.ACCOUNT.value, | |||||
| created_by=setup_account.id, | |||||
| ) | |||||
| node_execution.offload_data = offload | |||||
| session.add(node_execution) | |||||
| session.flush() | |||||
| assert False | |||||
| """ | |||||
| 2025-08-21 15:34:49,570 INFO sqlalchemy.engine.Engine BEGIN (implicit) | |||||
| 2025-08-21 15:34:49,572 INFO sqlalchemy.engine.Engine INSERT INTO upload_files (id, tenant_id, storage_type, key, name, size, extension, mime_type, created_by_role, created_by, created_at, used, used_by, used_at, hash, source_url) VALUES (%(id__0)s::UUID, %(tenant_id__0)s::UUID, %(storage_type__0)s, %(k ... 410 characters truncated ... (created_at__1)s, %(used__1)s, %(used_by__1)s::UUID, %(used_at__1)s, %(hash__1)s, %(source_url__1)s) | |||||
| 2025-08-21 15:34:49,572 INFO sqlalchemy.engine.Engine [generated in 0.00009s (insertmanyvalues) 1/1 (unordered)] {'created_at__0': datetime.datetime(2025, 8, 21, 15, 34, 49, 570482), 'id__0': '366621fa-4326-403e-8709-62e4d0de7367', 'storage_type__0': 'local', 'extension__0': 'txt', 'created_by__0': 'ccc7657c-fb48-46bd-8f42-c837b14eab18', 'used_at__0': None, 'used_by__0': None, 'source_url__0': '', 'mime_type__0': 'text/plain', 'created_by_role__0': 'account', 'used__0': False, 'size__0': 1024, 'tenant_id__0': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'hash__0': None, 'key__0': 'fake_storage_key', 'name__0': 'test_file.txt', 'created_at__1': datetime.datetime(2025, 8, 21, 15, 34, 49, 570563), 'id__1': '3cdec641-a452-4df0-a9af-4a1a30c27ea5', 'storage_type__1': 'local', 'extension__1': 'txt', 'created_by__1': 'ccc7657c-fb48-46bd-8f42-c837b14eab18', 'used_at__1': None, 'used_by__1': None, 'source_url__1': '', 'mime_type__1': 'text/plain', 'created_by_role__1': 'account', 'used__1': False, 'size__1': 1024, 'tenant_id__1': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'hash__1': None, 'key__1': 'fake_storage_key', 'name__1': 'test_file.txt'} | |||||
| 2025-08-21 15:34:49,576 INFO sqlalchemy.engine.Engine INSERT INTO workflow_node_executions (id, tenant_id, app_id, workflow_id, triggered_from, workflow_run_id, index, predecessor_node_id, node_execution_id, node_id, node_type, title, inputs, process_data, outputs, status, error, execution_metadata, created_by_role, created_by, finished_at) VALUES (%(id)s::UUID, %(tenant_id)s::UUID, %(app_id)s::UUID, %(workflow_id)s::UUID, %(triggered_from)s, %(workflow_run_id)s::UUID, %(index)s, %(predecessor_node_id)s, %(node_execution_id)s, %(node_id)s, %(node_type)s, %(title)s, %(inputs)s, %(process_data)s, %(outputs)s, %(status)s, %(error)s, %(execution_metadata)s, %(created_by_role)s, %(created_by)s::UUID, %(finished_at)s) RETURNING workflow_node_executions.elapsed_time, workflow_node_executions.created_at | |||||
| 2025-08-21 15:34:49,576 INFO sqlalchemy.engine.Engine [generated in 0.00019s] {'id': '9aac28b6-b6fc-4aea-abdf-21da3227e621', 'tenant_id': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'app_id': '79fa81c7-2760-40db-af54-74cb2fea2ce7', 'workflow_id': '95d341e3-381c-4c54-a383-f685a9741053', 'triggered_from': <WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN: 'workflow-run'>, 'workflow_run_id': None, 'index': 1, 'predecessor_node_id': None, 'node_execution_id': None, 'node_id': 'test_node_id', 'node_type': 'test', 'title': 'Test Node', 'inputs': None, 'process_data': None, 'outputs': None, 'status': 'succeeded', 'error': None, 'execution_metadata': None, 'created_by_role': 'account', 'created_by': 'ccc7657c-fb48-46bd-8f42-c837b14eab18', 'finished_at': None} | |||||
| 2025-08-21 15:34:49,579 INFO sqlalchemy.engine.Engine INSERT INTO workflow_node_execution_offload (id, created_at, tenant_id, app_id, node_execution_id, inputs_file_id, outputs_file_id) VALUES (%(id)s::UUID, %(created_at)s, %(tenant_id)s::UUID, %(app_id)s::UUID, %(node_execution_id)s::UUID, %(inputs_file_id)s::UUID, %(outputs_file_id)s::UUID) | |||||
| 2025-08-21 15:34:49,579 INFO sqlalchemy.engine.Engine [generated in 0.00016s] {'id': '0198cd44-b7ea-724b-9e1b-5f062a2ef45b', 'created_at': datetime.datetime(2025, 8, 21, 15, 34, 49, 579072), 'tenant_id': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'app_id': '79fa81c7-2760-40db-af54-74cb2fea2ce7', 'node_execution_id': '9aac28b6-b6fc-4aea-abdf-21da3227e621', 'inputs_file_id': '366621fa-4326-403e-8709-62e4d0de7367', 'outputs_file_id': '3cdec641-a452-4df0-a9af-4a1a30c27ea5'} | |||||
| 2025-08-21 15:34:49,581 INFO sqlalchemy.engine.Engine SELECT workflow_node_executions.id AS workflow_node_executions_id, workflow_node_executions.tenant_id AS workflow_node_executions_tenant_id, workflow_node_executions.app_id AS workflow_node_executions_app_id, workflow_node_executions.workflow_id AS workflow_node_executions_workflow_id, workflow_node_executions.triggered_from AS workflow_node_executions_triggered_from, workflow_node_executions.workflow_run_id AS workflow_node_executions_workflow_run_id, workflow_node_executions.index AS workflow_node_executions_index, workflow_node_executions.predecessor_node_id AS workflow_node_executions_predecessor_node_id, workflow_node_executions.node_execution_id AS workflow_node_executions_node_execution_id, workflow_node_executions.node_id AS workflow_node_executions_node_id, workflow_node_executions.node_type AS workflow_node_executions_node_type, workflow_node_executions.title AS workflow_node_executions_title, workflow_node_executions.inputs AS workflow_node_executions_inputs, workflow_node_executions.process_data AS workflow_node_executions_process_data, workflow_node_executions.outputs AS workflow_node_executions_outputs, workflow_node_executions.status AS workflow_node_executions_status, workflow_node_executions.error AS workflow_node_executions_error, workflow_node_executions.elapsed_time AS workflow_node_executions_elapsed_time, workflow_node_executions.execution_metadata AS workflow_node_executions_execution_metadata, workflow_node_executions.created_at AS workflow_node_executions_created_at, workflow_node_executions.created_by_role AS workflow_node_executions_created_by_role, workflow_node_executions.created_by AS workflow_node_executions_created_by, workflow_node_executions.finished_at AS workflow_node_executions_finished_at | |||||
| FROM workflow_node_executions | |||||
| WHERE workflow_node_executions.id = %(id_1)s::UUID | |||||
| LIMIT %(param_1)s | |||||
| 2025-08-21 15:34:49,581 INFO sqlalchemy.engine.Engine [generated in 0.00009s] {'id_1': '9aac28b6-b6fc-4aea-abdf-21da3227e621', 'param_1': 1} | |||||
| 2025-08-21 15:34:49,585 INFO sqlalchemy.engine.Engine SELECT workflow_node_execution_offload.node_execution_id AS workflow_node_execution_offload_node_execution_id, workflow_node_execution_offload.id AS workflow_node_execution_offload_id, workflow_node_execution_offload.created_at AS workflow_node_execution_offload_created_at, workflow_node_execution_offload.tenant_id AS workflow_node_execution_offload_tenant_id, workflow_node_execution_offload.app_id AS workflow_node_execution_offload_app_id, workflow_node_execution_offload.inputs_file_id AS workflow_node_execution_offload_inputs_file_id, workflow_node_execution_offload.outputs_file_id AS workflow_node_execution_offload_outputs_file_id | |||||
| FROM workflow_node_execution_offload | |||||
| WHERE workflow_node_execution_offload.node_execution_id IN (%(primary_keys_1)s::UUID) | |||||
| 2025-08-21 15:34:49,585 INFO sqlalchemy.engine.Engine [generated in 0.00021s] {'primary_keys_1': '9aac28b6-b6fc-4aea-abdf-21da3227e621'} | |||||
| 2025-08-21 15:34:49,587 INFO sqlalchemy.engine.Engine SELECT upload_files.id AS upload_files_id, upload_files.tenant_id AS upload_files_tenant_id, upload_files.storage_type AS upload_files_storage_type, upload_files.key AS upload_files_key, upload_files.name AS upload_files_name, upload_files.size AS upload_files_size, upload_files.extension AS upload_files_extension, upload_files.mime_type AS upload_files_mime_type, upload_files.created_by_role AS upload_files_created_by_role, upload_files.created_by AS upload_files_created_by, upload_files.created_at AS upload_files_created_at, upload_files.used AS upload_files_used, upload_files.used_by AS upload_files_used_by, upload_files.used_at AS upload_files_used_at, upload_files.hash AS upload_files_hash, upload_files.source_url AS upload_files_source_url | |||||
| FROM upload_files | |||||
| WHERE upload_files.id IN (%(primary_keys_1)s::UUID) | |||||
| 2025-08-21 15:34:49,587 INFO sqlalchemy.engine.Engine [generated in 0.00012s] {'primary_keys_1': '3cdec641-a452-4df0-a9af-4a1a30c27ea5'} | |||||
| 2025-08-21 15:34:49,588 INFO sqlalchemy.engine.Engine SELECT upload_files.id AS upload_files_id, upload_files.tenant_id AS upload_files_tenant_id, upload_files.storage_type AS upload_files_storage_type, upload_files.key AS upload_files_key, upload_files.name AS upload_files_name, upload_files.size AS upload_files_size, upload_files.extension AS upload_files_extension, upload_files.mime_type AS upload_files_mime_type, upload_files.created_by_role AS upload_files_created_by_role, upload_files.created_by AS upload_files_created_by, upload_files.created_at AS upload_files_created_at, upload_files.used AS upload_files_used, upload_files.used_by AS upload_files_used_by, upload_files.used_at AS upload_files_used_at, upload_files.hash AS upload_files_hash, upload_files.source_url AS upload_files_source_url | |||||
| FROM upload_files | |||||
| WHERE upload_files.id IN (%(primary_keys_1)s::UUID) | |||||
| 2025-08-21 15:34:49,588 INFO sqlalchemy.engine.Engine [generated in 0.00010s] {'primary_keys_1': '366621fa-4326-403e-8709-62e4d0de7367'} | |||||
| """ | |||||
| """ | |||||
| upload_file_id: 366621fa-4326-403e-8709-62e4d0de7367 3cdec641-a452-4df0-a9af-4a1a30c27ea5 | |||||
| workflow_node_executions_id: 9aac28b6-b6fc-4aea-abdf-21da3227e621 | |||||
| offload_id: 0198cd44-b7ea-724b-9e1b-5f062a2ef45b | |||||
| """ |
| @dataclass | @dataclass | ||||
| class TruncationTestData: | class TruncationTestData: | ||||
| """Test data for truncation scenarios.""" | """Test data for truncation scenarios.""" | ||||
| name: str | name: str | ||||
| process_data: dict[str, any] | process_data: dict[str, any] | ||||
| should_truncate: bool | should_truncate: bool | ||||
| class TestProcessDataTruncationIntegration: | class TestProcessDataTruncationIntegration: | ||||
| """Integration tests for process_data truncation functionality.""" | """Integration tests for process_data truncation functionality.""" | ||||
| @pytest.fixture | @pytest.fixture | ||||
| def in_memory_db_engine(self): | def in_memory_db_engine(self): | ||||
| """Create an in-memory SQLite database for testing.""" | """Create an in-memory SQLite database for testing.""" | ||||
| engine = create_engine("sqlite:///:memory:") | engine = create_engine("sqlite:///:memory:") | ||||
| # Create minimal table structure for testing | # Create minimal table structure for testing | ||||
| with engine.connect() as conn: | with engine.connect() as conn: | ||||
| # Create workflow_node_executions table | # Create workflow_node_executions table | ||||
| conn.execute(text(""" | |||||
| conn.execute( | |||||
| text(""" | |||||
| CREATE TABLE workflow_node_executions ( | CREATE TABLE workflow_node_executions ( | ||||
| id TEXT PRIMARY KEY, | id TEXT PRIMARY KEY, | ||||
| tenant_id TEXT NOT NULL, | tenant_id TEXT NOT NULL, | ||||
| created_by TEXT NOT NULL, | created_by TEXT NOT NULL, | ||||
| finished_at DATETIME | finished_at DATETIME | ||||
| ) | ) | ||||
| """)) | |||||
| """) | |||||
| ) | |||||
| # Create workflow_node_execution_offload table | # Create workflow_node_execution_offload table | ||||
| conn.execute(text(""" | |||||
| conn.execute( | |||||
| text(""" | |||||
| CREATE TABLE workflow_node_execution_offload ( | CREATE TABLE workflow_node_execution_offload ( | ||||
| id TEXT PRIMARY KEY, | id TEXT PRIMARY KEY, | ||||
| created_at DATETIME NOT NULL, | created_at DATETIME NOT NULL, | ||||
| outputs_file_id TEXT, | outputs_file_id TEXT, | ||||
| process_data_file_id TEXT | process_data_file_id TEXT | ||||
| ) | ) | ||||
| """)) | |||||
| """) | |||||
| ) | |||||
| # Create upload_files table (simplified) | # Create upload_files table (simplified) | ||||
| conn.execute(text(""" | |||||
| conn.execute( | |||||
| text(""" | |||||
| CREATE TABLE upload_files ( | CREATE TABLE upload_files ( | ||||
| id TEXT PRIMARY KEY, | id TEXT PRIMARY KEY, | ||||
| tenant_id TEXT NOT NULL, | tenant_id TEXT NOT NULL, | ||||
| size INTEGER NOT NULL, | size INTEGER NOT NULL, | ||||
| created_at DATETIME NOT NULL | created_at DATETIME NOT NULL | ||||
| ) | ) | ||||
| """)) | |||||
| """) | |||||
| ) | |||||
| conn.commit() | conn.commit() | ||||
| return engine | return engine | ||||
| @pytest.fixture | @pytest.fixture | ||||
| def repository(self, in_memory_db_engine, mock_account): | def repository(self, in_memory_db_engine, mock_account): | ||||
| """Create a repository instance for testing.""" | """Create a repository instance for testing.""" | ||||
| session_factory = sessionmaker(bind=in_memory_db_engine) | session_factory = sessionmaker(bind=in_memory_db_engine) | ||||
| return SQLAlchemyWorkflowNodeExecutionRepository( | return SQLAlchemyWorkflowNodeExecutionRepository( | ||||
| session_factory=session_factory, | session_factory=session_factory, | ||||
| user=mock_account, | user=mock_account, | ||||
| ) | ) | ||||
| def create_test_execution( | def create_test_execution( | ||||
| self, | |||||
| process_data: dict[str, any] | None = None, | |||||
| execution_id: str = "test-execution-id" | |||||
| self, process_data: dict[str, any] | None = None, execution_id: str = "test-execution-id" | |||||
| ) -> WorkflowNodeExecution: | ) -> WorkflowNodeExecution: | ||||
| """Create a test execution with process_data.""" | """Create a test execution with process_data.""" | ||||
| return WorkflowNodeExecution( | return WorkflowNodeExecution( | ||||
| "logs": ["log entry"] * 500, # Large array | "logs": ["log entry"] * 500, # Large array | ||||
| "config": {"setting": "value"}, | "config": {"setting": "value"}, | ||||
| "status": "processing", | "status": "processing", | ||||
| "details": {"description": "y" * 5000} # Large string | |||||
| "details": {"description": "y" * 5000}, # Large string | |||||
| }, | }, | ||||
| should_truncate=True, | should_truncate=True, | ||||
| expected_storage_interaction=True, | expected_storage_interaction=True, | ||||
| ), | ), | ||||
| ] | ] | ||||
| @patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config') | |||||
| @patch('services.file_service.FileService.upload_file') | |||||
| @patch('extensions.ext_storage.storage') | |||||
| def test_end_to_end_process_data_truncation( | |||||
| self, | |||||
| mock_storage, | |||||
| mock_upload_file, | |||||
| mock_config, | |||||
| repository | |||||
| ): | |||||
| @patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config") | |||||
| @patch("services.file_service.FileService.upload_file") | |||||
| @patch("extensions.ext_storage.storage") | |||||
| def test_end_to_end_process_data_truncation(self, mock_storage, mock_upload_file, mock_config, repository): | |||||
| """Test end-to-end process_data truncation functionality.""" | """Test end-to-end process_data truncation functionality.""" | ||||
| # Configure truncation limits | # Configure truncation limits | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | ||||
| # Create large process_data that should be truncated | # Create large process_data that should be truncated | ||||
| large_process_data = { | large_process_data = { | ||||
| "large_field": "x" * 10000, # Exceeds string length limit | "large_field": "x" * 10000, # Exceeds string length limit | ||||
| "metadata": {"type": "processing", "timestamp": 1234567890} | |||||
| "metadata": {"type": "processing", "timestamp": 1234567890}, | |||||
| } | } | ||||
| # Mock file upload | # Mock file upload | ||||
| mock_file = Mock() | mock_file = Mock() | ||||
| mock_file.id = "mock-process-data-file-id" | mock_file.id = "mock-process-data-file-id" | ||||
| mock_upload_file.return_value = mock_file | mock_upload_file.return_value = mock_file | ||||
| # Create and save execution | # Create and save execution | ||||
| execution = self.create_test_execution(process_data=large_process_data) | execution = self.create_test_execution(process_data=large_process_data) | ||||
| repository.save(execution) | repository.save(execution) | ||||
| # Verify truncation occurred | # Verify truncation occurred | ||||
| assert execution.process_data_truncated is True | assert execution.process_data_truncated is True | ||||
| truncated_data = execution.get_truncated_process_data() | truncated_data = execution.get_truncated_process_data() | ||||
| assert truncated_data is not None | assert truncated_data is not None | ||||
| assert truncated_data != large_process_data # Should be different due to truncation | assert truncated_data != large_process_data # Should be different due to truncation | ||||
| # Verify file upload was called for process_data | # Verify file upload was called for process_data | ||||
| assert mock_upload_file.called | assert mock_upload_file.called | ||||
| upload_args = mock_upload_file.call_args | upload_args = mock_upload_file.call_args | ||||
| assert "_process_data" in upload_args[1]["filename"] | assert "_process_data" in upload_args[1]["filename"] | ||||
| @patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config') | |||||
| @patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config") | |||||
| def test_small_process_data_no_truncation(self, mock_config, repository): | def test_small_process_data_no_truncation(self, mock_config, repository): | ||||
| """Test that small process_data is not truncated.""" | """Test that small process_data is not truncated.""" | ||||
| # Configure truncation limits | # Configure truncation limits | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | ||||
| # Create small process_data | # Create small process_data | ||||
| small_process_data = {"small": "data", "count": 5} | small_process_data = {"small": "data", "count": 5} | ||||
| execution = self.create_test_execution(process_data=small_process_data) | execution = self.create_test_execution(process_data=small_process_data) | ||||
| repository.save(execution) | repository.save(execution) | ||||
| # Verify no truncation occurred | # Verify no truncation occurred | ||||
| assert execution.process_data_truncated is False | assert execution.process_data_truncated is False | ||||
| assert execution.get_truncated_process_data() is None | assert execution.get_truncated_process_data() is None | ||||
| assert execution.get_response_process_data() == small_process_data | assert execution.get_response_process_data() == small_process_data | ||||
| @pytest.mark.parametrize("test_data", [ | |||||
| data for data in get_truncation_test_data(None) | |||||
| ], ids=[data.name for data in get_truncation_test_data(None)]) | |||||
| @patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config') | |||||
| @patch('services.file_service.FileService.upload_file') | |||||
| @pytest.mark.parametrize( | |||||
| "test_data", | |||||
| get_truncation_test_data(None), | |||||
| ids=[data.name for data in get_truncation_test_data(None)], | |||||
| ) | |||||
| @patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config") | |||||
| @patch("services.file_service.FileService.upload_file") | |||||
| def test_various_truncation_scenarios( | def test_various_truncation_scenarios( | ||||
| self, | |||||
| mock_upload_file, | |||||
| mock_config, | |||||
| test_data: TruncationTestData, | |||||
| repository | |||||
| self, mock_upload_file, mock_config, test_data: TruncationTestData, repository | |||||
| ): | ): | ||||
| """Test various process_data truncation scenarios.""" | """Test various process_data truncation scenarios.""" | ||||
| # Configure truncation limits | # Configure truncation limits | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | ||||
| if test_data.expected_storage_interaction: | if test_data.expected_storage_interaction: | ||||
| # Mock file upload for truncation scenarios | # Mock file upload for truncation scenarios | ||||
| mock_file = Mock() | mock_file = Mock() | ||||
| mock_file.id = f"file-{test_data.name}" | mock_file.id = f"file-{test_data.name}" | ||||
| mock_upload_file.return_value = mock_file | mock_upload_file.return_value = mock_file | ||||
| execution = self.create_test_execution(process_data=test_data.process_data) | execution = self.create_test_execution(process_data=test_data.process_data) | ||||
| repository.save(execution) | repository.save(execution) | ||||
| # Verify truncation behavior matches expectations | # Verify truncation behavior matches expectations | ||||
| assert execution.process_data_truncated == test_data.should_truncate | assert execution.process_data_truncated == test_data.should_truncate | ||||
| if test_data.should_truncate: | if test_data.should_truncate: | ||||
| assert execution.get_truncated_process_data() is not None | assert execution.get_truncated_process_data() is not None | ||||
| assert execution.get_truncated_process_data() != test_data.process_data | assert execution.get_truncated_process_data() != test_data.process_data | ||||
| assert execution.get_truncated_process_data() is None | assert execution.get_truncated_process_data() is None | ||||
| assert execution.get_response_process_data() == test_data.process_data | assert execution.get_response_process_data() == test_data.process_data | ||||
| @patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config') | |||||
| @patch('services.file_service.FileService.upload_file') | |||||
| @patch('extensions.ext_storage.storage') | |||||
| @patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config") | |||||
| @patch("services.file_service.FileService.upload_file") | |||||
| @patch("extensions.ext_storage.storage") | |||||
| def test_load_truncated_execution_from_database( | def test_load_truncated_execution_from_database( | ||||
| self, | |||||
| mock_storage, | |||||
| mock_upload_file, | |||||
| mock_config, | |||||
| repository, | |||||
| in_memory_db_engine | |||||
| self, mock_storage, mock_upload_file, mock_config, repository, in_memory_db_engine | |||||
| ): | ): | ||||
| """Test loading an execution with truncated process_data from database.""" | """Test loading an execution with truncated process_data from database.""" | ||||
| # Configure truncation | # Configure truncation | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | ||||
| # Create and save execution with large process_data | # Create and save execution with large process_data | ||||
| large_process_data = { | |||||
| "large_field": "x" * 10000, | |||||
| "metadata": "info" | |||||
| } | |||||
| large_process_data = {"large_field": "x" * 10000, "metadata": "info"} | |||||
| # Mock file upload | # Mock file upload | ||||
| mock_file = Mock() | mock_file = Mock() | ||||
| mock_file.id = "process-data-file-id" | mock_file.id = "process-data-file-id" | ||||
| mock_upload_file.return_value = mock_file | mock_upload_file.return_value = mock_file | ||||
| execution = self.create_test_execution(process_data=large_process_data) | execution = self.create_test_execution(process_data=large_process_data) | ||||
| repository.save(execution) | repository.save(execution) | ||||
| # Mock storage load for reconstruction | # Mock storage load for reconstruction | ||||
| mock_storage.load.return_value = json.dumps(large_process_data).encode() | mock_storage.load.return_value = json.dumps(large_process_data).encode() | ||||
| # Create a new repository instance to simulate fresh load | # Create a new repository instance to simulate fresh load | ||||
| session_factory = sessionmaker(bind=in_memory_db_engine) | session_factory = sessionmaker(bind=in_memory_db_engine) | ||||
| new_repository = SQLAlchemyWorkflowNodeExecutionRepository( | new_repository = SQLAlchemyWorkflowNodeExecutionRepository( | ||||
| app_id="test-app-id", | app_id="test-app-id", | ||||
| triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | ||||
| ) | ) | ||||
| # Load executions from database | # Load executions from database | ||||
| executions = new_repository.get_by_workflow_run("test-run-id") | executions = new_repository.get_by_workflow_run("test-run-id") | ||||
| assert len(executions) == 1 | assert len(executions) == 1 | ||||
| loaded_execution = executions[0] | loaded_execution = executions[0] | ||||
| # Verify that full data is loaded | # Verify that full data is loaded | ||||
| assert loaded_execution.process_data == large_process_data | assert loaded_execution.process_data == large_process_data | ||||
| assert loaded_execution.process_data_truncated is True | assert loaded_execution.process_data_truncated is True | ||||
| # Verify truncated data for responses | # Verify truncated data for responses | ||||
| response_data = loaded_execution.get_response_process_data() | response_data = loaded_execution.get_response_process_data() | ||||
| assert response_data != large_process_data # Should be truncated version | assert response_data != large_process_data # Should be truncated version | ||||
| """Test handling of None process_data.""" | """Test handling of None process_data.""" | ||||
| execution = self.create_test_execution(process_data=None) | execution = self.create_test_execution(process_data=None) | ||||
| repository.save(execution) | repository.save(execution) | ||||
| # Should handle None gracefully | # Should handle None gracefully | ||||
| assert execution.process_data is None | assert execution.process_data is None | ||||
| assert execution.process_data_truncated is False | assert execution.process_data_truncated is False | ||||
| """Test handling of empty process_data.""" | """Test handling of empty process_data.""" | ||||
| execution = self.create_test_execution(process_data={}) | execution = self.create_test_execution(process_data={}) | ||||
| repository.save(execution) | repository.save(execution) | ||||
| # Should handle empty dict gracefully | # Should handle empty dict gracefully | ||||
| assert execution.process_data == {} | assert execution.process_data == {} | ||||
| assert execution.process_data_truncated is False | assert execution.process_data_truncated is False | ||||
| class TestProcessDataTruncationApiIntegration: | class TestProcessDataTruncationApiIntegration: | ||||
| """Integration tests for API responses with process_data truncation.""" | """Integration tests for API responses with process_data truncation.""" | ||||
| def test_api_response_includes_truncated_flag(self): | def test_api_response_includes_truncated_flag(self): | ||||
| """Test that API responses include the process_data_truncated flag.""" | """Test that API responses include the process_data_truncated flag.""" | ||||
| from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter | from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter | ||||
| from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity | from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity | ||||
| from core.app.entities.queue_entities import QueueNodeSucceededEvent | from core.app.entities.queue_entities import QueueNodeSucceededEvent | ||||
| # Create execution with truncated process_data | # Create execution with truncated process_data | ||||
| execution = WorkflowNodeExecution( | execution = WorkflowNodeExecution( | ||||
| id="test-execution-id", | id="test-execution-id", | ||||
| created_at=datetime.now(), | created_at=datetime.now(), | ||||
| finished_at=datetime.now(), | finished_at=datetime.now(), | ||||
| ) | ) | ||||
| # Set truncated data | # Set truncated data | ||||
| execution.set_truncated_process_data({"large": "[TRUNCATED]"}) | execution.set_truncated_process_data({"large": "[TRUNCATED]"}) | ||||
| # Create converter and event | # Create converter and event | ||||
| converter = WorkflowResponseConverter( | converter = WorkflowResponseConverter( | ||||
| application_generate_entity=Mock( | |||||
| spec=WorkflowAppGenerateEntity, | |||||
| app_config=Mock(tenant_id="test-tenant") | |||||
| ) | |||||
| application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")) | |||||
| ) | ) | ||||
| event = QueueNodeSucceededEvent( | event = QueueNodeSucceededEvent( | ||||
| node_id="test-node-id", | node_id="test-node-id", | ||||
| node_type=NodeType.LLM, | node_type=NodeType.LLM, | ||||
| in_iteration_id=None, | in_iteration_id=None, | ||||
| in_loop_id=None, | in_loop_id=None, | ||||
| ) | ) | ||||
| # Generate response | # Generate response | ||||
| response = converter.workflow_node_finish_to_stream_response( | response = converter.workflow_node_finish_to_stream_response( | ||||
| event=event, | event=event, | ||||
| task_id="test-task-id", | task_id="test-task-id", | ||||
| workflow_node_execution=execution, | workflow_node_execution=execution, | ||||
| ) | ) | ||||
| # Verify response includes truncated flag and data | # Verify response includes truncated flag and data | ||||
| assert response is not None | assert response is not None | ||||
| assert response.data.process_data_truncated is True | assert response.data.process_data_truncated is True | ||||
| assert response.data.process_data == {"large": "[TRUNCATED]"} | assert response.data.process_data == {"large": "[TRUNCATED]"} | ||||
| # Verify response can be serialized | # Verify response can be serialized | ||||
| response_dict = response.to_dict() | response_dict = response.to_dict() | ||||
| assert "process_data_truncated" in response_dict["data"] | assert "process_data_truncated" in response_dict["data"] | ||||
| def test_workflow_run_fields_include_truncated_flag(self): | def test_workflow_run_fields_include_truncated_flag(self): | ||||
| """Test that workflow run fields include process_data_truncated.""" | """Test that workflow run fields include process_data_truncated.""" | ||||
| from fields.workflow_run_fields import workflow_run_node_execution_fields | from fields.workflow_run_fields import workflow_run_node_execution_fields | ||||
| # Verify the field is included in the definition | # Verify the field is included in the definition | ||||
| assert "process_data_truncated" in workflow_run_node_execution_fields | assert "process_data_truncated" in workflow_run_node_execution_fields | ||||
| # The field should be a Boolean field | # The field should be a Boolean field | ||||
| field = workflow_run_node_execution_fields["process_data_truncated"] | field = workflow_run_node_execution_fields["process_data_truncated"] | ||||
| from flask_restful import fields | from flask_restful import fields | ||||
| assert isinstance(field, fields.Boolean) | |||||
| assert isinstance(field, fields.Boolean) |
| from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter | from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter | ||||
| from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity | from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity | ||||
| from core.app.entities.queue_entities import QueueNodeRetryEvent, QueueNodeSucceededEvent | from core.app.entities.queue_entities import QueueNodeRetryEvent, QueueNodeSucceededEvent | ||||
| from core.helper.code_executor.code_executor import CodeLanguage | |||||
| from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus | from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus | ||||
| from core.workflow.nodes.code.entities import CodeNodeData | |||||
| from core.workflow.nodes.enums import NodeType | |||||
| from core.workflow.enums import NodeType | |||||
| from libs.datetime_utils import naive_utc_now | from libs.datetime_utils import naive_utc_now | ||||
| from models import Account | |||||
| @dataclass | @dataclass | ||||
| def create_workflow_response_converter(self) -> WorkflowResponseConverter: | def create_workflow_response_converter(self) -> WorkflowResponseConverter: | ||||
| """Create a WorkflowResponseConverter for testing.""" | """Create a WorkflowResponseConverter for testing.""" | ||||
| mock_entity = self.create_mock_generate_entity() | mock_entity = self.create_mock_generate_entity() | ||||
| return WorkflowResponseConverter(application_generate_entity=mock_entity) | |||||
| mock_user = Mock(spec=Account) | |||||
| mock_user.id = "test-user-id" | |||||
| mock_user.name = "Test User" | |||||
| mock_user.email = "test@example.com" | |||||
| return WorkflowResponseConverter(application_generate_entity=mock_entity, user=mock_user) | |||||
| def create_workflow_node_execution( | def create_workflow_node_execution( | ||||
| self, | self, | ||||
| return QueueNodeSucceededEvent( | return QueueNodeSucceededEvent( | ||||
| node_id="test-node-id", | node_id="test-node-id", | ||||
| node_type=NodeType.CODE, | node_type=NodeType.CODE, | ||||
| node_data=CodeNodeData( | |||||
| title="test code", | |||||
| variables=[], | |||||
| code_language=CodeLanguage.PYTHON3, | |||||
| code="", | |||||
| outputs={}, | |||||
| ), | |||||
| node_execution_id=str(uuid.uuid4()), | node_execution_id=str(uuid.uuid4()), | ||||
| start_at=naive_utc_now(), | start_at=naive_utc_now(), | ||||
| parallel_id=None, | parallel_id=None, | ||||
| retry_index=1, | retry_index=1, | ||||
| node_id="test-node-id", | node_id="test-node-id", | ||||
| node_type=NodeType.CODE, | node_type=NodeType.CODE, | ||||
| node_data=CodeNodeData( | |||||
| title="test code", | |||||
| variables=[], | |||||
| code_language=CodeLanguage.PYTHON3, | |||||
| code="", | |||||
| outputs={}, | |||||
| ), | |||||
| node_title="test code", | |||||
| provider_type="built-in", | |||||
| provider_id="code", | |||||
| node_execution_id=str(uuid.uuid4()), | node_execution_id=str(uuid.uuid4()), | ||||
| start_at=naive_utc_now(), | start_at=naive_utc_now(), | ||||
| parallel_id=None, | parallel_id=None, | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "scenario", | "scenario", | ||||
| [scenario for scenario in get_process_data_response_scenarios()], | |||||
| get_process_data_response_scenarios(), | |||||
| ids=[scenario.name for scenario in get_process_data_response_scenarios()], | ids=[scenario.name for scenario in get_process_data_response_scenarios()], | ||||
| ) | ) | ||||
| def test_node_finish_response_scenarios(self, scenario: ProcessDataResponseScenario): | def test_node_finish_response_scenarios(self, scenario: ProcessDataResponseScenario): | ||||
| """Test various scenarios for node finish responses.""" | """Test various scenarios for node finish responses.""" | ||||
| mock_user = Mock(spec=Account) | |||||
| mock_user.id = "test-user-id" | |||||
| mock_user.name = "Test User" | |||||
| mock_user.email = "test@example.com" | |||||
| converter = WorkflowResponseConverter( | converter = WorkflowResponseConverter( | ||||
| application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")) | |||||
| application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")), | |||||
| user=mock_user, | |||||
| ) | ) | ||||
| execution = WorkflowNodeExecution( | execution = WorkflowNodeExecution( | ||||
| event = QueueNodeSucceededEvent( | event = QueueNodeSucceededEvent( | ||||
| node_id="test-node-id", | node_id="test-node-id", | ||||
| node_type=NodeType.CODE, | node_type=NodeType.CODE, | ||||
| node_data=CodeNodeData( | |||||
| title="test code", | |||||
| variables=[], | |||||
| code_language=CodeLanguage.PYTHON3, | |||||
| code="", | |||||
| outputs={}, | |||||
| ), | |||||
| node_execution_id=str(uuid.uuid4()), | node_execution_id=str(uuid.uuid4()), | ||||
| start_at=naive_utc_now(), | start_at=naive_utc_now(), | ||||
| parallel_id=None, | parallel_id=None, | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "scenario", | "scenario", | ||||
| [scenario for scenario in get_process_data_response_scenarios()], | |||||
| get_process_data_response_scenarios(), | |||||
| ids=[scenario.name for scenario in get_process_data_response_scenarios()], | ids=[scenario.name for scenario in get_process_data_response_scenarios()], | ||||
| ) | ) | ||||
| def test_node_retry_response_scenarios(self, scenario: ProcessDataResponseScenario): | def test_node_retry_response_scenarios(self, scenario: ProcessDataResponseScenario): | ||||
| """Test various scenarios for node retry responses.""" | """Test various scenarios for node retry responses.""" | ||||
| mock_user = Mock(spec=Account) | |||||
| mock_user.id = "test-user-id" | |||||
| mock_user.name = "Test User" | |||||
| mock_user.email = "test@example.com" | |||||
| converter = WorkflowResponseConverter( | converter = WorkflowResponseConverter( | ||||
| application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")) | |||||
| application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")), | |||||
| user=mock_user, | |||||
| ) | ) | ||||
| execution = WorkflowNodeExecution( | execution = WorkflowNodeExecution( |
| WorkflowNodeExecution, | WorkflowNodeExecution, | ||||
| WorkflowNodeExecutionStatus, | WorkflowNodeExecutionStatus, | ||||
| ) | ) | ||||
| from core.workflow.nodes.enums import NodeType | |||||
| from core.workflow.enums import NodeType | |||||
| from models import Account, WorkflowNodeExecutionTriggeredFrom | from models import Account, WorkflowNodeExecutionTriggeredFrom | ||||
| from models.enums import ExecutionOffLoadType | from models.enums import ExecutionOffLoadType | ||||
| from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload | from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload |
| # Should be resolved to the actual qa_structure schema | # Should be resolved to the actual qa_structure schema | ||||
| assert resolved["type"] == "object" | assert resolved["type"] == "object" | ||||
| assert resolved["title"] == "Q&A Structure Schema" | |||||
| assert resolved["title"] == "Q&A Structure" | |||||
| assert "qa_chunks" in resolved["properties"] | assert "qa_chunks" in resolved["properties"] | ||||
| assert resolved["properties"]["qa_chunks"]["type"] == "array" | assert resolved["properties"]["qa_chunks"]["type"] == "array" | ||||
| # $ref should be resolved | # $ref should be resolved | ||||
| file_schema = resolved["properties"]["file_data"] | file_schema = resolved["properties"]["file_data"] | ||||
| assert file_schema["type"] == "object" | assert file_schema["type"] == "object" | ||||
| assert file_schema["title"] == "File Schema" | |||||
| assert file_schema["title"] == "File" | |||||
| assert "name" in file_schema["properties"] | assert "name" in file_schema["properties"] | ||||
| # Metadata fields should be removed from resolved schema | # Metadata fields should be removed from resolved schema | ||||
| # Items $ref should be resolved | # Items $ref should be resolved | ||||
| items_schema = resolved["items"] | items_schema = resolved["items"] | ||||
| assert items_schema["type"] == "array" | assert items_schema["type"] == "array" | ||||
| assert items_schema["title"] == "General Structure Schema" | |||||
| assert items_schema["title"] == "General Structure" | |||||
| def test_non_dify_ref_unchanged(self): | def test_non_dify_ref_unchanged(self): | ||||
| """Test that non-Dify $refs are left unchanged""" | """Test that non-Dify $refs are left unchanged""" | ||||
| # Dify $ref should be resolved | # Dify $ref should be resolved | ||||
| assert resolved["properties"]["dify_data"]["type"] == "object" | assert resolved["properties"]["dify_data"]["type"] == "object" | ||||
| assert resolved["properties"]["dify_data"]["title"] == "File Schema" | |||||
| assert resolved["properties"]["dify_data"]["title"] == "File" | |||||
| def test_no_refs_schema_unchanged(self): | def test_no_refs_schema_unchanged(self): | ||||
| """Test that schemas without $refs are returned unchanged""" | """Test that schemas without $refs are returned unchanged""" | ||||
| # Check refs are resolved | # Check refs are resolved | ||||
| assert resolved["properties"]["files"]["items"]["type"] == "object" | assert resolved["properties"]["files"]["items"]["type"] == "object" | ||||
| assert resolved["properties"]["files"]["items"]["title"] == "File Schema" | |||||
| assert resolved["properties"]["files"]["items"]["title"] == "File" | |||||
| assert resolved["properties"]["nested"]["properties"]["qa"]["type"] == "object" | assert resolved["properties"]["nested"]["properties"]["qa"]["type"] == "object" | ||||
| assert resolved["properties"]["nested"]["properties"]["qa"]["title"] == "Q&A Structure Schema" | |||||
| assert resolved["properties"]["nested"]["properties"]["qa"]["title"] == "Q&A Structure" | |||||
| class TestUtilityFunctions: | class TestUtilityFunctions: | ||||
| assert isinstance(resolved, list) | assert isinstance(resolved, list) | ||||
| assert len(resolved) == 3 | assert len(resolved) == 3 | ||||
| assert resolved[0]["type"] == "object" | assert resolved[0]["type"] == "object" | ||||
| assert resolved[0]["title"] == "File Schema" | |||||
| assert resolved[0]["title"] == "File" | |||||
| assert resolved[1] == {"type": "string"} | assert resolved[1] == {"type": "string"} | ||||
| assert resolved[2]["type"] == "object" | assert resolved[2]["type"] == "object" | ||||
| assert resolved[2]["title"] == "Q&A Structure Schema" | |||||
| assert resolved[2]["title"] == "Q&A Structure" | |||||
| def test_cache_performance(self): | def test_cache_performance(self): | ||||
| """Test that caching improves performance""" | """Test that caching improves performance""" | ||||
| # Cache should make it faster (more lenient check) | # Cache should make it faster (more lenient check) | ||||
| assert result1 == result2 | assert result1 == result2 | ||||
| # Cache should provide some performance benefit | |||||
| assert avg_time_with_cache <= avg_time_no_cache | |||||
| # Cache should provide some performance benefit (allow for measurement variance) | |||||
| # We expect cache to be faster, but allow for small timing variations | |||||
| performance_ratio = avg_time_with_cache / avg_time_no_cache if avg_time_no_cache > 0 else 1.0 | |||||
| assert performance_ratio <= 2.0, f"Cache performance degraded too much: {performance_ratio}" | |||||
| def test_fast_path_performance_no_refs(self): | def test_fast_path_performance_no_refs(self): | ||||
| """Test that schemas without $refs use fast path and avoid deep copying""" | """Test that schemas without $refs use fast path and avoid deep copying""" |
| import pytest | import pytest | ||||
| from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution | from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution | ||||
| from core.workflow.nodes.enums import NodeType | |||||
| from core.workflow.enums import NodeType | |||||
| class TestWorkflowNodeExecutionProcessDataTruncation: | class TestWorkflowNodeExecutionProcessDataTruncation: | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "scenario", | "scenario", | ||||
| [scenario for scenario in get_process_data_scenarios(None)], | |||||
| get_process_data_scenarios(None), | |||||
| ids=[scenario.name for scenario in get_process_data_scenarios(None)], | ids=[scenario.name for scenario in get_process_data_scenarios(None)], | ||||
| ) | ) | ||||
| def test_process_data_scenarios(self, scenario: ProcessDataScenario): | def test_process_data_scenarios(self, scenario: ProcessDataScenario): |
| from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload | from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload | ||||
| class TestWorkflowNodeExecutionOffload: | |||||
| """Test WorkflowNodeExecutionOffload model with process_data fields.""" | |||||
| def test_get_exe(self): | |||||
| WorkflowNodeExecutionOffload | |||||
| class TestWorkflowNodeExecutionModel: | class TestWorkflowNodeExecutionModel: | ||||
| """Test WorkflowNodeExecutionModel with process_data truncation features.""" | """Test WorkflowNodeExecutionModel with process_data truncation features.""" | ||||
| def test_process_data_truncated_property_false_when_no_offload_data(self): | def test_process_data_truncated_property_false_when_no_offload_data(self): | ||||
| """Test process_data_truncated returns False when no offload_data.""" | """Test process_data_truncated returns False when no offload_data.""" | ||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| execution.offload_data = None | |||||
| execution.offload_data = [] | |||||
| assert execution.process_data_truncated is False | assert execution.process_data_truncated is False | ||||
| def test_process_data_truncated_property_false_when_no_process_data_file(self): | def test_process_data_truncated_property_false_when_no_process_data_file(self): | ||||
| """Test process_data_truncated returns False when no process_data file.""" | """Test process_data_truncated returns False when no process_data file.""" | ||||
| from models.enums import ExecutionOffLoadType | |||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| # Create real offload instance | |||||
| offload_data = WorkflowNodeExecutionOffload() | |||||
| offload_data.inputs_file_id = "inputs-file" | |||||
| offload_data.outputs_file_id = "outputs-file" | |||||
| offload_data.process_data_file_id = None # No process_data file | |||||
| execution.offload_data = offload_data | |||||
| # Create real offload instances for inputs and outputs but not process_data | |||||
| inputs_offload = WorkflowNodeExecutionOffload() | |||||
| inputs_offload.type_ = ExecutionOffLoadType.INPUTS | |||||
| inputs_offload.file_id = "inputs-file" | |||||
| outputs_offload = WorkflowNodeExecutionOffload() | |||||
| outputs_offload.type_ = ExecutionOffLoadType.OUTPUTS | |||||
| outputs_offload.file_id = "outputs-file" | |||||
| execution.offload_data = [inputs_offload, outputs_offload] | |||||
| assert execution.process_data_truncated is False | assert execution.process_data_truncated is False | ||||
| def test_process_data_truncated_property_true_when_process_data_file_exists(self): | def test_process_data_truncated_property_true_when_process_data_file_exists(self): | ||||
| """Test process_data_truncated returns True when process_data file exists.""" | """Test process_data_truncated returns True when process_data file exists.""" | ||||
| from models.enums import ExecutionOffLoadType | |||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| # Create a real offload instance rather than mock | |||||
| offload_data = WorkflowNodeExecutionOffload() | |||||
| offload_data.process_data_file_id = "process-data-file-id" | |||||
| execution.offload_data = offload_data | |||||
| # Create a real offload instance for process_data | |||||
| process_data_offload = WorkflowNodeExecutionOffload() | |||||
| process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA | |||||
| process_data_offload.file_id = "process-data-file-id" | |||||
| execution.offload_data = [process_data_offload] | |||||
| assert execution.process_data_truncated is True | assert execution.process_data_truncated is True | ||||
| def test_load_full_process_data_with_no_offload_data(self): | def test_load_full_process_data_with_no_offload_data(self): | ||||
| """Test load_full_process_data when no offload data exists.""" | """Test load_full_process_data when no offload data exists.""" | ||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| execution.offload_data = None | |||||
| execution.process_data_dict = {"test": "data"} | |||||
| execution.offload_data = [] | |||||
| execution.process_data = '{"test": "data"}' | |||||
| # Mock session and storage | # Mock session and storage | ||||
| mock_session = Mock() | mock_session = Mock() | ||||
| def test_load_full_process_data_with_no_file(self): | def test_load_full_process_data_with_no_file(self): | ||||
| """Test load_full_process_data when no process_data file exists.""" | """Test load_full_process_data when no process_data file exists.""" | ||||
| from models.enums import ExecutionOffLoadType | |||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| execution.offload_data = self.create_mock_offload_data(process_data_file_id=None) | |||||
| execution.process_data_dict = {"test": "data"} | |||||
| # Create offload data for inputs only, not process_data | |||||
| inputs_offload = WorkflowNodeExecutionOffload() | |||||
| inputs_offload.type_ = ExecutionOffLoadType.INPUTS | |||||
| inputs_offload.file_id = "inputs-file" | |||||
| execution.offload_data = [inputs_offload] | |||||
| execution.process_data = '{"test": "data"}' | |||||
| # Mock session and storage | # Mock session and storage | ||||
| mock_session = Mock() | mock_session = Mock() | ||||
| def test_load_full_process_data_with_file(self): | def test_load_full_process_data_with_file(self): | ||||
| """Test load_full_process_data when process_data file exists.""" | """Test load_full_process_data when process_data file exists.""" | ||||
| from models.enums import ExecutionOffLoadType | |||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| offload_data = self.create_mock_offload_data(process_data_file_id="file-id") | |||||
| execution.offload_data = offload_data | |||||
| execution.process_data_dict = {"truncated": "data"} | |||||
| # Create process_data offload | |||||
| process_data_offload = WorkflowNodeExecutionOffload() | |||||
| process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA | |||||
| process_data_offload.file_id = "file-id" | |||||
| execution.offload_data = [process_data_offload] | |||||
| execution.process_data = '{"truncated": "data"}' | |||||
| # Mock session and storage | # Mock session and storage | ||||
| mock_session = Mock() | mock_session = Mock() | ||||
| def test_consistency_with_inputs_outputs_truncation(self): | def test_consistency_with_inputs_outputs_truncation(self): | ||||
| """Test that process_data truncation behaves consistently with inputs/outputs.""" | """Test that process_data truncation behaves consistently with inputs/outputs.""" | ||||
| from models.enums import ExecutionOffLoadType | |||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| # Test all three truncation properties together | |||||
| offload_data = self.create_mock_offload_data( | |||||
| inputs_file_id="inputs-file", outputs_file_id="outputs-file", process_data_file_id="process-data-file" | |||||
| ) | |||||
| execution.offload_data = offload_data | |||||
| # Create offload data for all three types | |||||
| inputs_offload = WorkflowNodeExecutionOffload() | |||||
| inputs_offload.type_ = ExecutionOffLoadType.INPUTS | |||||
| inputs_offload.file_id = "inputs-file" | |||||
| outputs_offload = WorkflowNodeExecutionOffload() | |||||
| outputs_offload.type_ = ExecutionOffLoadType.OUTPUTS | |||||
| outputs_offload.file_id = "outputs-file" | |||||
| # All should be truncated | |||||
| process_data_offload = WorkflowNodeExecutionOffload() | |||||
| process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA | |||||
| process_data_offload.file_id = "process-data-file" | |||||
| execution.offload_data = [inputs_offload, outputs_offload, process_data_offload] | |||||
| # All three should be truncated | |||||
| assert execution.inputs_truncated is True | assert execution.inputs_truncated is True | ||||
| assert execution.outputs_truncated is True | assert execution.outputs_truncated is True | ||||
| assert execution.process_data_truncated is True | assert execution.process_data_truncated is True | ||||
| def test_mixed_truncation_states(self): | def test_mixed_truncation_states(self): | ||||
| """Test mixed states of truncation.""" | """Test mixed states of truncation.""" | ||||
| from models.enums import ExecutionOffLoadType | |||||
| execution = WorkflowNodeExecutionModel() | execution = WorkflowNodeExecutionModel() | ||||
| # Only process_data is truncated | # Only process_data is truncated | ||||
| offload_data = self.create_mock_offload_data( | |||||
| inputs_file_id=None, outputs_file_id=None, process_data_file_id="process-data-file" | |||||
| ) | |||||
| execution.offload_data = offload_data | |||||
| process_data_offload = WorkflowNodeExecutionOffload() | |||||
| process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA | |||||
| process_data_offload.file_id = "process-data-file" | |||||
| execution.offload_data = [process_data_offload] | |||||
| assert execution.inputs_truncated is False | assert execution.inputs_truncated is False | ||||
| assert execution.outputs_truncated is False | assert execution.outputs_truncated is False |
| import json | import json | ||||
| from dataclasses import dataclass | from dataclasses import dataclass | ||||
| from datetime import datetime | from datetime import datetime | ||||
| from typing import Any | |||||
| from unittest.mock import MagicMock, Mock, patch | from unittest.mock import MagicMock, Mock, patch | ||||
| import pytest | import pytest | ||||
| _InputsOutputsTruncationResult, | _InputsOutputsTruncationResult, | ||||
| ) | ) | ||||
| from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution | from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution | ||||
| from core.workflow.nodes.enums import NodeType | |||||
| from core.workflow.enums import NodeType | |||||
| from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom | from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom | ||||
| from models.model import UploadFile | from models.model import UploadFile | ||||
| from models.workflow import WorkflowNodeExecutionOffload | from models.workflow import WorkflowNodeExecutionOffload | ||||
| """Create a repository instance for testing.""" | """Create a repository instance for testing.""" | ||||
| mock_account = self.create_mock_account() | mock_account = self.create_mock_account() | ||||
| mock_session_factory = self.create_mock_session_factory() | mock_session_factory = self.create_mock_session_factory() | ||||
| repository = SQLAlchemyWorkflowNodeExecutionRepository( | repository = SQLAlchemyWorkflowNodeExecutionRepository( | ||||
| session_factory=mock_session_factory, | session_factory=mock_session_factory, | ||||
| user=mock_account, | user=mock_account, | ||||
| app_id="test-app-id", | app_id="test-app-id", | ||||
| triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | ||||
| ) | ) | ||||
| if mock_file_service: | if mock_file_service: | ||||
| repository._file_service = mock_file_service | repository._file_service = mock_file_service | ||||
| return repository | return repository | ||||
| def create_workflow_node_execution( | def create_workflow_node_execution( | ||||
| self, | self, | ||||
| process_data: dict[str, any] | None = None, | |||||
| process_data: dict[str, Any] | None = None, | |||||
| execution_id: str = "test-execution-id", | execution_id: str = "test-execution-id", | ||||
| ) -> WorkflowNodeExecution: | ) -> WorkflowNodeExecution: | ||||
| """Create a WorkflowNodeExecution instance for testing.""" | """Create a WorkflowNodeExecution instance for testing.""" | ||||
| created_at=datetime.now(), | created_at=datetime.now(), | ||||
| ) | ) | ||||
| @patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config') | |||||
| @patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config") | |||||
| def test_to_db_model_with_small_process_data(self, mock_config): | def test_to_db_model_with_small_process_data(self, mock_config): | ||||
| """Test _to_db_model with small process_data that doesn't need truncation.""" | """Test _to_db_model with small process_data that doesn't need truncation.""" | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 | ||||
| mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 | ||||
| repository = self.create_repository() | repository = self.create_repository() | ||||
| small_process_data = {"small": "data", "count": 5} | small_process_data = {"small": "data", "count": 5} | ||||
| execution = self.create_workflow_node_execution(process_data=small_process_data) | execution = self.create_workflow_node_execution(process_data=small_process_data) | ||||
| with patch.object(repository, '_truncate_and_upload', return_value=None) as mock_truncate: | |||||
| with patch.object(repository, "_truncate_and_upload", return_value=None) as mock_truncate: | |||||
| db_model = repository._to_db_model(execution) | db_model = repository._to_db_model(execution) | ||||
| # Should try to truncate but return None (no truncation needed) | # Should try to truncate but return None (no truncation needed) | ||||
| mock_truncate.assert_called_once_with( | |||||
| small_process_data, | |||||
| execution.id, | |||||
| "_process_data" | |||||
| ) | |||||
| mock_truncate.assert_called_once_with(small_process_data, execution.id, "_process_data") | |||||
| # Process data should be stored directly in database | # Process data should be stored directly in database | ||||
| assert db_model.process_data is not None | assert db_model.process_data is not None | ||||
| stored_data = json.loads(db_model.process_data) | stored_data = json.loads(db_model.process_data) | ||||
| assert stored_data == small_process_data | assert stored_data == small_process_data | ||||
| # No offload data should be created for process_data | # No offload data should be created for process_data | ||||
| assert db_model.offload_data is None | assert db_model.offload_data is None | ||||
| def test_to_db_model_with_large_process_data(self): | def test_to_db_model_with_large_process_data(self): | ||||
| """Test _to_db_model with large process_data that needs truncation.""" | """Test _to_db_model with large process_data that needs truncation.""" | ||||
| repository = self.create_repository() | repository = self.create_repository() | ||||
| # Create large process_data that would need truncation | # Create large process_data that would need truncation | ||||
| large_process_data = { | large_process_data = { | ||||
| "large_field": "x" * 10000, # Very large string | "large_field": "x" * 10000, # Very large string | ||||
| "metadata": {"type": "processing", "timestamp": 1234567890} | |||||
| "metadata": {"type": "processing", "timestamp": 1234567890}, | |||||
| } | } | ||||
| # Mock truncation result | # Mock truncation result | ||||
| truncated_data = { | |||||
| "large_field": "[TRUNCATED]", | |||||
| "metadata": {"type": "processing", "timestamp": 1234567890} | |||||
| } | |||||
| truncated_data = {"large_field": "[TRUNCATED]", "metadata": {"type": "processing", "timestamp": 1234567890}} | |||||
| mock_upload_file = Mock(spec=UploadFile) | mock_upload_file = Mock(spec=UploadFile) | ||||
| mock_upload_file.id = "mock-file-id" | mock_upload_file.id = "mock-file-id" | ||||
| mock_offload = Mock(spec=WorkflowNodeExecutionOffload) | |||||
| truncation_result = _InputsOutputsTruncationResult( | truncation_result = _InputsOutputsTruncationResult( | ||||
| truncated_value=truncated_data, | |||||
| file=mock_upload_file | |||||
| truncated_value=truncated_data, file=mock_upload_file, offload=mock_offload | |||||
| ) | ) | ||||
| execution = self.create_workflow_node_execution(process_data=large_process_data) | execution = self.create_workflow_node_execution(process_data=large_process_data) | ||||
| with patch.object(repository, '_truncate_and_upload', return_value=truncation_result) as mock_truncate: | |||||
| with patch.object(repository, "_truncate_and_upload", return_value=truncation_result) as mock_truncate: | |||||
| db_model = repository._to_db_model(execution) | db_model = repository._to_db_model(execution) | ||||
| # Should call truncate with correct parameters | # Should call truncate with correct parameters | ||||
| mock_truncate.assert_called_once_with( | |||||
| large_process_data, | |||||
| execution.id, | |||||
| "_process_data" | |||||
| ) | |||||
| mock_truncate.assert_called_once_with(large_process_data, execution.id, "_process_data") | |||||
| # Truncated data should be stored in database | # Truncated data should be stored in database | ||||
| assert db_model.process_data is not None | assert db_model.process_data is not None | ||||
| stored_data = json.loads(db_model.process_data) | stored_data = json.loads(db_model.process_data) | ||||
| assert stored_data == truncated_data | assert stored_data == truncated_data | ||||
| # Domain model should have truncated data set | # Domain model should have truncated data set | ||||
| assert execution.process_data_truncated is True | assert execution.process_data_truncated is True | ||||
| assert execution.get_truncated_process_data() == truncated_data | assert execution.get_truncated_process_data() == truncated_data | ||||
| # Offload data should be created | # Offload data should be created | ||||
| assert db_model.offload_data is not None | assert db_model.offload_data is not None | ||||
| assert db_model.offload_data.process_data_file == mock_upload_file | |||||
| assert db_model.offload_data.process_data_file_id == "mock-file-id" | |||||
| assert len(db_model.offload_data) > 0 | |||||
| # Find the process_data offload entry | |||||
| process_data_offload = next( | |||||
| (item for item in db_model.offload_data if hasattr(item, "file_id") and item.file_id == "mock-file-id"), | |||||
| None, | |||||
| ) | |||||
| assert process_data_offload is not None | |||||
| def test_to_db_model_with_none_process_data(self): | def test_to_db_model_with_none_process_data(self): | ||||
| """Test _to_db_model with None process_data.""" | """Test _to_db_model with None process_data.""" | ||||
| repository = self.create_repository() | repository = self.create_repository() | ||||
| execution = self.create_workflow_node_execution(process_data=None) | execution = self.create_workflow_node_execution(process_data=None) | ||||
| with patch.object(repository, '_truncate_and_upload') as mock_truncate: | |||||
| with patch.object(repository, "_truncate_and_upload") as mock_truncate: | |||||
| db_model = repository._to_db_model(execution) | db_model = repository._to_db_model(execution) | ||||
| # Should not call truncate for None data | # Should not call truncate for None data | ||||
| mock_truncate.assert_not_called() | mock_truncate.assert_not_called() | ||||
| # Process data should be None | # Process data should be None | ||||
| assert db_model.process_data is None | assert db_model.process_data is None | ||||
| # No offload data should be created | # No offload data should be created | ||||
| assert db_model.offload_data is None | |||||
| assert db_model.offload_data == [] | |||||
| def test_to_domain_model_with_offloaded_process_data(self): | def test_to_domain_model_with_offloaded_process_data(self): | ||||
| """Test _to_domain_model with offloaded process_data.""" | """Test _to_domain_model with offloaded process_data.""" | ||||
| repository = self.create_repository() | repository = self.create_repository() | ||||
| # Create mock database model with offload data | # Create mock database model with offload data | ||||
| db_model = Mock(spec=WorkflowNodeExecutionModel) | db_model = Mock(spec=WorkflowNodeExecutionModel) | ||||
| db_model.id = "test-execution-id" | db_model.id = "test-execution-id" | ||||
| db_model.elapsed_time = 1.5 | db_model.elapsed_time = 1.5 | ||||
| db_model.created_at = datetime.now() | db_model.created_at = datetime.now() | ||||
| db_model.finished_at = None | db_model.finished_at = None | ||||
| # Mock truncated process_data from database | # Mock truncated process_data from database | ||||
| truncated_process_data = {"large_field": "[TRUNCATED]", "metadata": "info"} | truncated_process_data = {"large_field": "[TRUNCATED]", "metadata": "info"} | ||||
| db_model.process_data_dict = truncated_process_data | db_model.process_data_dict = truncated_process_data | ||||
| db_model.inputs_dict = None | db_model.inputs_dict = None | ||||
| db_model.outputs_dict = None | db_model.outputs_dict = None | ||||
| db_model.execution_metadata_dict = {} | db_model.execution_metadata_dict = {} | ||||
| # Mock offload data with process_data file | # Mock offload data with process_data file | ||||
| mock_offload_data = Mock(spec=WorkflowNodeExecutionOffload) | mock_offload_data = Mock(spec=WorkflowNodeExecutionOffload) | ||||
| mock_offload_data.inputs_file_id = None | mock_offload_data.inputs_file_id = None | ||||
| mock_offload_data.outputs_file_id = None | mock_offload_data.outputs_file_id = None | ||||
| mock_offload_data.outputs_file = None | mock_offload_data.outputs_file = None | ||||
| mock_offload_data.process_data_file_id = "process-data-file-id" | mock_offload_data.process_data_file_id = "process-data-file-id" | ||||
| mock_process_data_file = Mock(spec=UploadFile) | mock_process_data_file = Mock(spec=UploadFile) | ||||
| mock_offload_data.process_data_file = mock_process_data_file | mock_offload_data.process_data_file = mock_process_data_file | ||||
| db_model.offload_data = mock_offload_data | |||||
| db_model.offload_data = [mock_offload_data] | |||||
| # Mock the file loading | # Mock the file loading | ||||
| original_process_data = { | |||||
| "large_field": "x" * 10000, | |||||
| "metadata": "info" | |||||
| } | |||||
| with patch.object(repository, '_load_file', return_value=original_process_data) as mock_load: | |||||
| original_process_data = {"large_field": "x" * 10000, "metadata": "info"} | |||||
| with patch.object(repository, "_load_file", return_value=original_process_data) as mock_load: | |||||
| domain_model = repository._to_domain_model(db_model) | domain_model = repository._to_domain_model(db_model) | ||||
| # Should load the file | # Should load the file | ||||
| mock_load.assert_called_once_with(mock_process_data_file) | mock_load.assert_called_once_with(mock_process_data_file) | ||||
| # Domain model should have original data | # Domain model should have original data | ||||
| assert domain_model.process_data == original_process_data | assert domain_model.process_data == original_process_data | ||||
| # Domain model should have truncated data set | # Domain model should have truncated data set | ||||
| assert domain_model.process_data_truncated is True | assert domain_model.process_data_truncated is True | ||||
| assert domain_model.get_truncated_process_data() == truncated_process_data | assert domain_model.get_truncated_process_data() == truncated_process_data | ||||
| def test_to_domain_model_without_offload_data(self): | def test_to_domain_model_without_offload_data(self): | ||||
| """Test _to_domain_model without offload data.""" | """Test _to_domain_model without offload data.""" | ||||
| repository = self.create_repository() | repository = self.create_repository() | ||||
| # Create mock database model without offload data | # Create mock database model without offload data | ||||
| db_model = Mock(spec=WorkflowNodeExecutionModel) | db_model = Mock(spec=WorkflowNodeExecutionModel) | ||||
| db_model.id = "test-execution-id" | db_model.id = "test-execution-id" | ||||
| db_model.elapsed_time = 1.5 | db_model.elapsed_time = 1.5 | ||||
| db_model.created_at = datetime.now() | db_model.created_at = datetime.now() | ||||
| db_model.finished_at = None | db_model.finished_at = None | ||||
| process_data = {"normal": "data"} | process_data = {"normal": "data"} | ||||
| db_model.process_data_dict = process_data | db_model.process_data_dict = process_data | ||||
| db_model.inputs_dict = None | db_model.inputs_dict = None | ||||
| db_model.outputs_dict = None | db_model.outputs_dict = None | ||||
| db_model.execution_metadata_dict = {} | db_model.execution_metadata_dict = {} | ||||
| db_model.offload_data = None | db_model.offload_data = None | ||||
| domain_model = repository._to_domain_model(db_model) | domain_model = repository._to_domain_model(db_model) | ||||
| # Domain model should have the data from database | # Domain model should have the data from database | ||||
| assert domain_model.process_data == process_data | assert domain_model.process_data == process_data | ||||
| # Should not be truncated | # Should not be truncated | ||||
| assert domain_model.process_data_truncated is False | assert domain_model.process_data_truncated is False | ||||
| assert domain_model.get_truncated_process_data() is None | assert domain_model.get_truncated_process_data() is None | ||||
| @dataclass | @dataclass | ||||
| class TruncationScenario: | class TruncationScenario: | ||||
| """Test scenario for truncation functionality.""" | """Test scenario for truncation functionality.""" | ||||
| name: str | name: str | ||||
| process_data: dict[str, any] | None | |||||
| process_data: dict[str, Any] | None | |||||
| should_truncate: bool | should_truncate: bool | ||||
| expected_truncated: bool = False | expected_truncated: bool = False | ||||
| class TestProcessDataTruncationScenarios: | class TestProcessDataTruncationScenarios: | ||||
| """Test various scenarios for process_data truncation.""" | """Test various scenarios for process_data truncation.""" | ||||
| def get_truncation_scenarios(self) -> list[TruncationScenario]: | def get_truncation_scenarios(self) -> list[TruncationScenario]: | ||||
| """Create test scenarios for truncation.""" | """Create test scenarios for truncation.""" | ||||
| return [ | return [ | ||||
| should_truncate=False, | should_truncate=False, | ||||
| ), | ), | ||||
| ] | ] | ||||
| @pytest.mark.parametrize("scenario", [ | |||||
| scenario for scenario in get_truncation_scenarios(None) | |||||
| ], ids=[scenario.name for scenario in get_truncation_scenarios(None)]) | |||||
| @pytest.mark.parametrize( | |||||
| "scenario", | |||||
| [ | |||||
| TruncationScenario("none_data", None, False, False), | |||||
| TruncationScenario("small_data", {"small": "data"}, False, False), | |||||
| TruncationScenario("large_data", {"large": "x" * 10000}, True, True), | |||||
| TruncationScenario("empty_data", {}, False, False), | |||||
| ], | |||||
| ids=["none_data", "small_data", "large_data", "empty_data"], | |||||
| ) | |||||
| def test_process_data_truncation_scenarios(self, scenario: TruncationScenario): | def test_process_data_truncation_scenarios(self, scenario: TruncationScenario): | ||||
| """Test various process_data truncation scenarios.""" | """Test various process_data truncation scenarios.""" | ||||
| repository = SQLAlchemyWorkflowNodeExecutionRepository( | repository = SQLAlchemyWorkflowNodeExecutionRepository( | ||||
| app_id="test-app", | app_id="test-app", | ||||
| triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, | ||||
| ) | ) | ||||
| execution = WorkflowNodeExecution( | execution = WorkflowNodeExecution( | ||||
| id="test-execution-id", | id="test-execution-id", | ||||
| workflow_id="test-workflow-id", | workflow_id="test-workflow-id", | ||||
| process_data=scenario.process_data, | process_data=scenario.process_data, | ||||
| created_at=datetime.now(), | created_at=datetime.now(), | ||||
| ) | ) | ||||
| # Mock truncation behavior | # Mock truncation behavior | ||||
| if scenario.should_truncate: | if scenario.should_truncate: | ||||
| truncated_data = {"truncated": True} | truncated_data = {"truncated": True} | ||||
| mock_file = Mock(spec=UploadFile, id="file-id") | mock_file = Mock(spec=UploadFile, id="file-id") | ||||
| mock_offload = Mock(spec=WorkflowNodeExecutionOffload) | |||||
| truncation_result = _InputsOutputsTruncationResult( | truncation_result = _InputsOutputsTruncationResult( | ||||
| truncated_value=truncated_data, | |||||
| file=mock_file | |||||
| truncated_value=truncated_data, file=mock_file, offload=mock_offload | |||||
| ) | ) | ||||
| with patch.object(repository, '_truncate_and_upload', return_value=truncation_result): | |||||
| with patch.object(repository, "_truncate_and_upload", return_value=truncation_result): | |||||
| db_model = repository._to_db_model(execution) | db_model = repository._to_db_model(execution) | ||||
| # Should create offload data | # Should create offload data | ||||
| assert db_model.offload_data is not None | assert db_model.offload_data is not None | ||||
| assert db_model.offload_data.process_data_file_id == "file-id" | |||||
| assert len(db_model.offload_data) > 0 | |||||
| # Find the process_data offload entry | |||||
| process_data_offload = next( | |||||
| (item for item in db_model.offload_data if hasattr(item, "file_id") and item.file_id == "file-id"), | |||||
| None, | |||||
| ) | |||||
| assert process_data_offload is not None | |||||
| assert execution.process_data_truncated == scenario.expected_truncated | assert execution.process_data_truncated == scenario.expected_truncated | ||||
| else: | else: | ||||
| with patch.object(repository, '_truncate_and_upload', return_value=None): | |||||
| with patch.object(repository, "_truncate_and_upload", return_value=None): | |||||
| db_model = repository._to_db_model(execution) | db_model = repository._to_db_model(execution) | ||||
| # Should not create offload data or set truncation | # Should not create offload data or set truncation | ||||
| if scenario.process_data is None: | if scenario.process_data is None: | ||||
| assert db_model.offload_data is None | |||||
| assert db_model.offload_data == [] | |||||
| assert db_model.process_data is None | assert db_model.process_data is None | ||||
| else: | else: | ||||
| # For small data, might have offload_data from other fields but not process_data | # For small data, might have offload_data from other fields but not process_data | ||||
| if db_model.offload_data: | if db_model.offload_data: | ||||
| assert db_model.offload_data.process_data_file_id is None | |||||
| assert db_model.offload_data.process_data_file is None | |||||
| assert execution.process_data_truncated is False | |||||
| # Check that no process_data offload entries exist | |||||
| process_data_offloads = [ | |||||
| item | |||||
| for item in db_model.offload_data | |||||
| if hasattr(item, "type_") and item.type_.value == "process_data" | |||||
| ] | |||||
| assert len(process_data_offloads) == 0 | |||||
| assert execution.process_data_truncated is False |
| StringSegment, | StringSegment, | ||||
| ) | ) | ||||
| from services.variable_truncator import ( | from services.variable_truncator import ( | ||||
| ARRAY_CHAR_LIMIT, | |||||
| LARGE_VARIABLE_THRESHOLD, | |||||
| OBJECT_CHAR_LIMIT, | |||||
| MaxDepthExceededError, | MaxDepthExceededError, | ||||
| TruncationResult, | TruncationResult, | ||||
| UnknownTypeError, | UnknownTypeError, | ||||
| assert VariableTruncator.calculate_json_size("") == 2 # Just quotes | assert VariableTruncator.calculate_json_size("") == 2 # Just quotes | ||||
| # Unicode string | # Unicode string | ||||
| unicode_text = "你好" | |||||
| expected_size = len(unicode_text.encode("utf-8")) + 2 | |||||
| assert VariableTruncator.calculate_json_size(unicode_text) == expected_size | |||||
| assert VariableTruncator.calculate_json_size("你好") == 4 | |||||
| def test_number_size_calculation(self, truncator): | def test_number_size_calculation(self, truncator): | ||||
| """Test JSON size calculation for numbers.""" | """Test JSON size calculation for numbers.""" | ||||
| # Create deeply nested structure | # Create deeply nested structure | ||||
| nested: dict[str, Any] = {"level": 0} | nested: dict[str, Any] = {"level": 0} | ||||
| current = nested | current = nested | ||||
| for i in range(25): # Create deep nesting | |||||
| for i in range(105): # Create deep nesting | |||||
| current["next"] = {"level": i + 1} | current["next"] = {"level": i + 1} | ||||
| current = current["next"] | current = current["next"] | ||||
| class TestStringTruncation: | class TestStringTruncation: | ||||
| LENGTH_LIMIT = 10 | |||||
| """Test string truncation functionality.""" | """Test string truncation functionality.""" | ||||
| @pytest.fixture | @pytest.fixture | ||||
| def test_short_string_no_truncation(self, small_truncator): | def test_short_string_no_truncation(self, small_truncator): | ||||
| """Test that short strings are not truncated.""" | """Test that short strings are not truncated.""" | ||||
| short_str = "hello" | short_str = "hello" | ||||
| result, was_truncated = small_truncator._truncate_string(short_str) | |||||
| assert result == short_str | |||||
| assert was_truncated is False | |||||
| result = small_truncator._truncate_string(short_str, self.LENGTH_LIMIT) | |||||
| assert result.value == short_str | |||||
| assert result.truncated is False | |||||
| assert result.value_size == VariableTruncator.calculate_json_size(short_str) | |||||
| def test_long_string_truncation(self, small_truncator: VariableTruncator): | def test_long_string_truncation(self, small_truncator: VariableTruncator): | ||||
| """Test that long strings are truncated with ellipsis.""" | """Test that long strings are truncated with ellipsis.""" | ||||
| long_str = "this is a very long string that exceeds the limit" | long_str = "this is a very long string that exceeds the limit" | ||||
| result, was_truncated = small_truncator._truncate_string(long_str) | |||||
| result = small_truncator._truncate_string(long_str, self.LENGTH_LIMIT) | |||||
| assert was_truncated is True | |||||
| assert result == long_str[:7] + "..." | |||||
| assert len(result) == 10 # 10 chars + "..." | |||||
| assert result.truncated is True | |||||
| assert result.value == long_str[:5] + "..." | |||||
| assert result.value_size == 10 # 10 chars + "..." | |||||
| def test_exact_limit_string(self, small_truncator): | |||||
| def test_exact_limit_string(self, small_truncator: VariableTruncator): | |||||
| """Test string exactly at limit.""" | """Test string exactly at limit.""" | ||||
| exact_str = "1234567890" # Exactly 10 chars | exact_str = "1234567890" # Exactly 10 chars | ||||
| result, was_truncated = small_truncator._truncate_string(exact_str) | |||||
| assert result == exact_str | |||||
| assert was_truncated is False | |||||
| result = small_truncator._truncate_string(exact_str, self.LENGTH_LIMIT) | |||||
| assert result.value == "12345..." | |||||
| assert result.truncated is True | |||||
| assert result.value_size == 10 | |||||
| class TestArrayTruncation: | class TestArrayTruncation: | ||||
| def small_truncator(self): | def small_truncator(self): | ||||
| return VariableTruncator(array_element_limit=3, max_size_bytes=100) | return VariableTruncator(array_element_limit=3, max_size_bytes=100) | ||||
| def test_small_array_no_truncation(self, small_truncator): | |||||
| def test_small_array_no_truncation(self, small_truncator: VariableTruncator): | |||||
| """Test that small arrays are not truncated.""" | """Test that small arrays are not truncated.""" | ||||
| small_array = [1, 2] | small_array = [1, 2] | ||||
| result, was_truncated = small_truncator._truncate_array(small_array, 1000) | |||||
| assert result == small_array | |||||
| assert was_truncated is False | |||||
| result = small_truncator._truncate_array(small_array, 1000) | |||||
| assert result.value == small_array | |||||
| assert result.truncated is False | |||||
| def test_array_element_limit_truncation(self, small_truncator): | |||||
| def test_array_element_limit_truncation(self, small_truncator: VariableTruncator): | |||||
| """Test that arrays over element limit are truncated.""" | """Test that arrays over element limit are truncated.""" | ||||
| large_array = [1, 2, 3, 4, 5, 6] # Exceeds limit of 3 | large_array = [1, 2, 3, 4, 5, 6] # Exceeds limit of 3 | ||||
| result, was_truncated = small_truncator._truncate_array(large_array, 1000) | |||||
| result = small_truncator._truncate_array(large_array, 1000) | |||||
| assert was_truncated is True | |||||
| assert len(result) == 3 | |||||
| assert result == [1, 2, 3] | |||||
| assert result.truncated is True | |||||
| assert result.value == [1, 2, 3] | |||||
| def test_array_size_budget_truncation(self, small_truncator): | |||||
| def test_array_size_budget_truncation(self, small_truncator: VariableTruncator): | |||||
| """Test array truncation due to size budget constraints.""" | """Test array truncation due to size budget constraints.""" | ||||
| # Create array with strings that will exceed size budget | # Create array with strings that will exceed size budget | ||||
| large_strings = ["very long string " * 5, "another long string " * 5] | large_strings = ["very long string " * 5, "another long string " * 5] | ||||
| result, was_truncated = small_truncator._truncate_array(large_strings, 50) | |||||
| result = small_truncator._truncate_array(large_strings, 50) | |||||
| assert was_truncated is True | |||||
| assert result.truncated is True | |||||
| # Should have truncated the strings within the array | # Should have truncated the strings within the array | ||||
| for item in result: | |||||
| for item in result.value: | |||||
| assert isinstance(item, str) | assert isinstance(item, str) | ||||
| print(result) | |||||
| assert len(_compact_json_dumps(result).encode()) <= 50 | |||||
| assert VariableTruncator.calculate_json_size(result.value) <= 50 | |||||
| def test_array_with_nested_objects(self, small_truncator): | def test_array_with_nested_objects(self, small_truncator): | ||||
| """Test array truncation with nested objects.""" | """Test array truncation with nested objects.""" | ||||
| {"name": "item2", "data": "more data"}, | {"name": "item2", "data": "more data"}, | ||||
| {"name": "item3", "data": "even more data"}, | {"name": "item3", "data": "even more data"}, | ||||
| ] | ] | ||||
| result, was_truncated = small_truncator._truncate_array(nested_array, 80) | |||||
| result = small_truncator._truncate_array(nested_array, 30) | |||||
| assert isinstance(result, list) | |||||
| assert len(result) <= 3 | |||||
| # Should have processed nested objects appropriately | |||||
| assert isinstance(result.value, list) | |||||
| assert len(result.value) <= 3 | |||||
| for item in result.value: | |||||
| assert isinstance(item, dict) | |||||
| class TestObjectTruncation: | class TestObjectTruncation: | ||||
| def test_small_object_no_truncation(self, small_truncator): | def test_small_object_no_truncation(self, small_truncator): | ||||
| """Test that small objects are not truncated.""" | """Test that small objects are not truncated.""" | ||||
| small_obj = {"a": 1, "b": 2} | small_obj = {"a": 1, "b": 2} | ||||
| result, was_truncated = small_truncator._truncate_object(small_obj, 1000) | |||||
| assert result == small_obj | |||||
| assert was_truncated is False | |||||
| result = small_truncator._truncate_object(small_obj, 1000) | |||||
| assert result.value == small_obj | |||||
| assert result.truncated is False | |||||
| def test_empty_object_no_truncation(self, small_truncator): | def test_empty_object_no_truncation(self, small_truncator): | ||||
| """Test that empty objects are not truncated.""" | """Test that empty objects are not truncated.""" | ||||
| empty_obj = {} | empty_obj = {} | ||||
| result, was_truncated = small_truncator._truncate_object(empty_obj, 100) | |||||
| assert result == empty_obj | |||||
| assert was_truncated is False | |||||
| result = small_truncator._truncate_object(empty_obj, 100) | |||||
| assert result.value == empty_obj | |||||
| assert result.truncated is False | |||||
| def test_object_value_truncation(self, small_truncator): | def test_object_value_truncation(self, small_truncator): | ||||
| """Test object truncation where values are truncated to fit budget.""" | """Test object truncation where values are truncated to fit budget.""" | ||||
| "key2": "another long string " * 10, | "key2": "another long string " * 10, | ||||
| "key3": "third long string " * 10, | "key3": "third long string " * 10, | ||||
| } | } | ||||
| result, was_truncated = small_truncator._truncate_object(obj_with_long_values, 80) | |||||
| result = small_truncator._truncate_object(obj_with_long_values, 80) | |||||
| assert was_truncated is True | |||||
| assert isinstance(result, dict) | |||||
| assert result.truncated is True | |||||
| assert isinstance(result.value, dict) | |||||
| # Keys should be preserved (deterministic order due to sorting) | |||||
| if result: # Only check if result is not empty | |||||
| assert list(result.keys()) == sorted(result.keys()) | |||||
| assert set(result.value.keys()).issubset(obj_with_long_values.keys()) | |||||
| # Values should be truncated if they exist | # Values should be truncated if they exist | ||||
| for key, value in result.items(): | |||||
| for key, value in result.value.items(): | |||||
| if isinstance(value, str): | if isinstance(value, str): | ||||
| original_value = obj_with_long_values[key] | original_value = obj_with_long_values[key] | ||||
| # Value should be same or smaller | # Value should be same or smaller | ||||
| def test_object_key_dropping(self, small_truncator): | def test_object_key_dropping(self, small_truncator): | ||||
| """Test object truncation where keys are dropped due to size constraints.""" | """Test object truncation where keys are dropped due to size constraints.""" | ||||
| large_obj = {f"key{i:02d}": f"value{i}" for i in range(20)} | large_obj = {f"key{i:02d}": f"value{i}" for i in range(20)} | ||||
| result, was_truncated = small_truncator._truncate_object(large_obj, 50) | |||||
| result = small_truncator._truncate_object(large_obj, 50) | |||||
| assert was_truncated is True | |||||
| assert len(result) < len(large_obj) | |||||
| assert result.truncated is True | |||||
| assert len(result.value) < len(large_obj) | |||||
| # Should maintain sorted key order | # Should maintain sorted key order | ||||
| result_keys = list(result.keys()) | |||||
| result_keys = list(result.value.keys()) | |||||
| assert result_keys == sorted(result_keys) | assert result_keys == sorted(result_keys) | ||||
| def test_object_with_nested_structures(self, small_truncator): | def test_object_with_nested_structures(self, small_truncator): | ||||
| """Test object truncation with nested arrays and objects.""" | """Test object truncation with nested arrays and objects.""" | ||||
| nested_obj = {"simple": "value", "array": [1, 2, 3, 4, 5], "nested": {"inner": "data", "more": ["a", "b", "c"]}} | nested_obj = {"simple": "value", "array": [1, 2, 3, 4, 5], "nested": {"inner": "data", "more": ["a", "b", "c"]}} | ||||
| result, was_truncated = small_truncator._truncate_object(nested_obj, 60) | |||||
| result = small_truncator._truncate_object(nested_obj, 60) | |||||
| assert isinstance(result, dict) | |||||
| # Should handle nested structures appropriately | |||||
| assert isinstance(result.value, dict) | |||||
| class TestSegmentBasedTruncation: | class TestSegmentBasedTruncation: | ||||
| assert len(result.result.value) <= 1000 # Much smaller than original | assert len(result.result.value) <= 1000 # Much smaller than original | ||||
| class TestTruncationHelperMethods: | |||||
| """Test helper methods used in truncation.""" | |||||
| @pytest.fixture | |||||
| def truncator(self): | |||||
| return VariableTruncator() | |||||
| def test_truncate_item_to_budget_string(self, truncator): | |||||
| """Test _truncate_item_to_budget with string input.""" | |||||
| item = "this is a long string" | |||||
| budget = 15 | |||||
| result, was_truncated = truncator._truncate_item_to_budget(item, budget) | |||||
| assert isinstance(result, str) | |||||
| # Should be truncated to fit budget | |||||
| if was_truncated: | |||||
| assert len(result) <= budget | |||||
| assert result.endswith("...") | |||||
| def test_truncate_item_to_budget_dict(self, truncator): | |||||
| """Test _truncate_item_to_budget with dict input.""" | |||||
| item = {"key": "value", "longer": "longer value"} | |||||
| budget = 30 | |||||
| result, was_truncated = truncator._truncate_item_to_budget(item, budget) | |||||
| assert isinstance(result, dict) | |||||
| # Should apply object truncation logic | |||||
| def test_truncate_item_to_budget_list(self, truncator): | |||||
| """Test _truncate_item_to_budget with list input.""" | |||||
| item = [1, 2, 3, 4, 5] | |||||
| budget = 15 | |||||
| result, was_truncated = truncator._truncate_item_to_budget(item, budget) | |||||
| assert isinstance(result, list) | |||||
| # Should apply array truncation logic | |||||
| def test_truncate_item_to_budget_other_types(self, truncator): | |||||
| """Test _truncate_item_to_budget with other types.""" | |||||
| # Small number that fits | |||||
| result, was_truncated = truncator._truncate_item_to_budget(123, 10) | |||||
| assert result == 123 | |||||
| assert was_truncated is False | |||||
| # Large number that might not fit - should convert to string if needed | |||||
| large_num = 123456789012345 | |||||
| result, was_truncated = truncator._truncate_item_to_budget(large_num, 5) | |||||
| if was_truncated: | |||||
| assert isinstance(result, str) | |||||
| def test_truncate_value_to_budget_string(self, truncator): | |||||
| """Test _truncate_value_to_budget with string input.""" | |||||
| value = "x" * 100 | |||||
| budget = 20 | |||||
| result, was_truncated = truncator._truncate_value_to_budget(value, budget) | |||||
| assert isinstance(result, str) | |||||
| if was_truncated: | |||||
| assert len(result) <= 20 # Should respect budget | |||||
| assert result.endswith("...") | |||||
| def test_truncate_value_to_budget_respects_object_char_limit(self, truncator): | |||||
| """Test that _truncate_value_to_budget respects OBJECT_CHAR_LIMIT.""" | |||||
| # Even with large budget, should respect OBJECT_CHAR_LIMIT | |||||
| large_string = "x" * 10000 | |||||
| large_budget = 20000 | |||||
| result, was_truncated = truncator._truncate_value_to_budget(large_string, large_budget) | |||||
| if was_truncated: | |||||
| assert len(result) <= OBJECT_CHAR_LIMIT + 3 # +3 for "..." | |||||
| class TestEdgeCases: | class TestEdgeCases: | ||||
| """Test edge cases and error conditions.""" | """Test edge cases and error conditions.""" | ||||
| if isinstance(result.result, ObjectSegment): | if isinstance(result.result, ObjectSegment): | ||||
| result_size = truncator.calculate_json_size(result.result.value) | result_size = truncator.calculate_json_size(result.result.value) | ||||
| assert result_size <= original_size | assert result_size <= original_size | ||||
| class TestConstantsAndConfiguration: | |||||
| """Test behavior with different configuration constants.""" | |||||
| def test_large_variable_threshold_constant(self): | |||||
| """Test that LARGE_VARIABLE_THRESHOLD constant is properly used.""" | |||||
| truncator = VariableTruncator() | |||||
| assert truncator._max_size_bytes == LARGE_VARIABLE_THRESHOLD | |||||
| assert LARGE_VARIABLE_THRESHOLD == 10 * 1024 # 10KB | |||||
| def test_string_truncation_limit_constant(self): | |||||
| """Test that STRING_TRUNCATION_LIMIT constant is properly used.""" | |||||
| truncator = VariableTruncator() | |||||
| assert truncator._string_length_limit == 5000 | |||||
| def test_array_char_limit_constant(self): | |||||
| """Test that ARRAY_CHAR_LIMIT is used in array item truncation.""" | |||||
| truncator = VariableTruncator() | |||||
| # Test that ARRAY_CHAR_LIMIT is respected in array item truncation | |||||
| long_string = "x" * 2000 | |||||
| budget = 5000 # Large budget | |||||
| result, was_truncated = truncator._truncate_item_to_budget(long_string, budget) | |||||
| if was_truncated: | |||||
| # Should not exceed ARRAY_CHAR_LIMIT even with large budget | |||||
| assert len(result) <= ARRAY_CHAR_LIMIT + 3 # +3 for "..." | |||||
| def test_object_char_limit_constant(self): | |||||
| """Test that OBJECT_CHAR_LIMIT is used in object value truncation.""" | |||||
| truncator = VariableTruncator() | |||||
| # Test that OBJECT_CHAR_LIMIT is respected in object value truncation | |||||
| long_string = "x" * 8000 | |||||
| large_budget = 20000 | |||||
| result, was_truncated = truncator._truncate_value_to_budget(long_string, large_budget) | |||||
| if was_truncated: | |||||
| # Should not exceed OBJECT_CHAR_LIMIT even with large budget | |||||
| assert len(result) <= OBJECT_CHAR_LIMIT + 3 # +3 for "..." |
| """Simplified unit tests for DraftVarLoader focusing on core functionality.""" | """Simplified unit tests for DraftVarLoader focusing on core functionality.""" | ||||
| import json | import json | ||||
| from unittest.mock import Mock, patch | from unittest.mock import Mock, patch | ||||
| def draft_var_loader(self, mock_engine): | def draft_var_loader(self, mock_engine): | ||||
| """Create DraftVarLoader instance for testing.""" | """Create DraftVarLoader instance for testing.""" | ||||
| return DraftVarLoader( | return DraftVarLoader( | ||||
| engine=mock_engine, | |||||
| app_id="test-app-id", | |||||
| tenant_id="test-tenant-id", | |||||
| fallback_variables=[] | |||||
| engine=mock_engine, app_id="test-app-id", tenant_id="test-tenant-id", fallback_variables=[] | |||||
| ) | ) | ||||
| def test_load_offloaded_variable_string_type_unit(self, draft_var_loader): | def test_load_offloaded_variable_string_type_unit(self, draft_var_loader): | ||||
| assert variable.name == "test_variable" | assert variable.name == "test_variable" | ||||
| assert variable.description == "test description" | assert variable.description == "test description" | ||||
| assert variable.value == test_content | assert variable.value == test_content | ||||
| # Verify storage was called correctly | # Verify storage was called correctly | ||||
| mock_storage.load.assert_called_once_with("storage/key/test.txt") | mock_storage.load.assert_called_once_with("storage/key/test.txt") | ||||
| assert variable.name == "test_object" | assert variable.name == "test_object" | ||||
| assert variable.description == "test description" | assert variable.description == "test description" | ||||
| assert variable.value == test_object | assert variable.value == test_object | ||||
| # Verify method calls | # Verify method calls | ||||
| mock_storage.load.assert_called_once_with("storage/key/test.json") | mock_storage.load.assert_called_once_with("storage/key/test.json") | ||||
| mock_build_segment.assert_called_once_with(SegmentType.OBJECT, test_object) | mock_build_segment.assert_called_once_with(SegmentType.OBJECT, test_object) | ||||
| with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: | with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: | ||||
| from core.variables.segments import FloatSegment | from core.variables.segments import FloatSegment | ||||
| mock_segment = FloatSegment(value=test_number) | mock_segment = FloatSegment(value=test_number) | ||||
| mock_build_segment.return_value = mock_segment | mock_build_segment.return_value = mock_segment | ||||
| assert variable.id == "draft-var-id" | assert variable.id == "draft-var-id" | ||||
| assert variable.name == "test_number" | assert variable.name == "test_number" | ||||
| assert variable.description == "test number description" | assert variable.description == "test number description" | ||||
| # Verify method calls | # Verify method calls | ||||
| mock_storage.load.assert_called_once_with("storage/key/test_number.json") | mock_storage.load.assert_called_once_with("storage/key/test_number.json") | ||||
| mock_build_segment.assert_called_once_with(SegmentType.NUMBER, test_number) | mock_build_segment.assert_called_once_with(SegmentType.NUMBER, test_number) | ||||
| draft_var = Mock(spec=WorkflowDraftVariable) | draft_var = Mock(spec=WorkflowDraftVariable) | ||||
| draft_var.id = "draft-var-id" | draft_var.id = "draft-var-id" | ||||
| draft_var.node_id = "test-node-id" | |||||
| draft_var.node_id = "test-node-id" | |||||
| draft_var.name = "test_array" | draft_var.name = "test_array" | ||||
| draft_var.description = "test array description" | draft_var.description = "test array description" | ||||
| draft_var.get_selector.return_value = ["test-node-id", "test_array"] | draft_var.get_selector.return_value = ["test-node-id", "test_array"] | ||||
| with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: | with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: | ||||
| from core.variables.segments import ArrayAnySegment | from core.variables.segments import ArrayAnySegment | ||||
| mock_segment = ArrayAnySegment(value=test_array) | mock_segment = ArrayAnySegment(value=test_array) | ||||
| mock_build_segment.return_value = mock_segment | mock_build_segment.return_value = mock_segment | ||||
| assert variable.id == "draft-var-id" | assert variable.id == "draft-var-id" | ||||
| assert variable.name == "test_array" | assert variable.name == "test_array" | ||||
| assert variable.description == "test array description" | assert variable.description == "test array description" | ||||
| # Verify method calls | # Verify method calls | ||||
| mock_storage.load.assert_called_once_with("storage/key/test_array.json") | mock_storage.load.assert_called_once_with("storage/key/test_array.json") | ||||
| mock_build_segment.assert_called_once_with(SegmentType.ARRAY_ANY, test_array) | mock_build_segment.assert_called_once_with(SegmentType.ARRAY_ANY, test_array) | ||||
| def test_load_variables_with_offloaded_variables_unit(self, draft_var_loader): | def test_load_variables_with_offloaded_variables_unit(self, draft_var_loader): | ||||
| """Test load_variables method with mix of regular and offloaded variables.""" | """Test load_variables method with mix of regular and offloaded variables.""" | ||||
| selectors = [ | |||||
| ["node1", "regular_var"], | |||||
| ["node2", "offloaded_var"] | |||||
| ] | |||||
| selectors = [["node1", "regular_var"], ["node2", "offloaded_var"]] | |||||
| # Mock regular variable | # Mock regular variable | ||||
| regular_draft_var = Mock(spec=WorkflowDraftVariable) | regular_draft_var = Mock(spec=WorkflowDraftVariable) | ||||
| regular_draft_var.is_truncated.return_value = False | regular_draft_var.is_truncated.return_value = False | ||||
| regular_draft_var.node_id = "node1" | regular_draft_var.node_id = "node1" | ||||
| regular_draft_var.name = "regular_var" | |||||
| regular_draft_var.name = "regular_var" | |||||
| regular_draft_var.get_value.return_value = StringSegment(value="regular_value") | regular_draft_var.get_value.return_value = StringSegment(value="regular_value") | ||||
| regular_draft_var.get_selector.return_value = ["node1", "regular_var"] | regular_draft_var.get_selector.return_value = ["node1", "regular_var"] | ||||
| regular_draft_var.id = "regular-var-id" | regular_draft_var.id = "regular-var-id" | ||||
| # Mock offloaded variable | # Mock offloaded variable | ||||
| upload_file = Mock(spec=UploadFile) | upload_file = Mock(spec=UploadFile) | ||||
| upload_file.key = "storage/key/offloaded.txt" | upload_file.key = "storage/key/offloaded.txt" | ||||
| variable_file = Mock(spec=WorkflowDraftVariableFile) | variable_file = Mock(spec=WorkflowDraftVariableFile) | ||||
| variable_file.value_type = SegmentType.STRING | variable_file.value_type = SegmentType.STRING | ||||
| variable_file.upload_file = upload_file | variable_file.upload_file = upload_file | ||||
| with patch("services.workflow_draft_variable_service.Session") as mock_session_cls: | with patch("services.workflow_draft_variable_service.Session") as mock_session_cls: | ||||
| mock_session = Mock() | mock_session = Mock() | ||||
| mock_session_cls.return_value.__enter__.return_value = mock_session | mock_session_cls.return_value.__enter__.return_value = mock_session | ||||
| mock_service = Mock() | mock_service = Mock() | ||||
| mock_service.get_draft_variables_by_selectors.return_value = draft_vars | mock_service.get_draft_variables_by_selectors.return_value = draft_vars | ||||
| with patch("services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service): | |||||
| with patch( | |||||
| "services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service | |||||
| ): | |||||
| with patch("services.workflow_draft_variable_service.StorageKeyLoader"): | with patch("services.workflow_draft_variable_service.StorageKeyLoader"): | ||||
| with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable: | with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable: | ||||
| # Mock regular variable creation | # Mock regular variable creation | ||||
| regular_variable = Mock() | regular_variable = Mock() | ||||
| regular_variable.selector = ["node1", "regular_var"] | regular_variable.selector = ["node1", "regular_var"] | ||||
| # Mock offloaded variable creation | # Mock offloaded variable creation | ||||
| offloaded_variable = Mock() | offloaded_variable = Mock() | ||||
| offloaded_variable.selector = ["node2", "offloaded_var"] | offloaded_variable.selector = ["node2", "offloaded_var"] | ||||
| mock_segment_to_variable.return_value = regular_variable | mock_segment_to_variable.return_value = regular_variable | ||||
| with patch("services.workflow_draft_variable_service.storage") as mock_storage: | with patch("services.workflow_draft_variable_service.storage") as mock_storage: | ||||
| mock_storage.load.return_value = b"offloaded_content" | mock_storage.load.return_value = b"offloaded_content" | ||||
| with patch.object(draft_var_loader, "_load_offloaded_variable") as mock_load_offloaded: | with patch.object(draft_var_loader, "_load_offloaded_variable") as mock_load_offloaded: | ||||
| mock_load_offloaded.return_value = (("node2", "offloaded_var"), offloaded_variable) | mock_load_offloaded.return_value = (("node2", "offloaded_var"), offloaded_variable) | ||||
| with patch("concurrent.futures.ThreadPoolExecutor") as mock_executor_cls: | with patch("concurrent.futures.ThreadPoolExecutor") as mock_executor_cls: | ||||
| mock_executor = Mock() | mock_executor = Mock() | ||||
| mock_executor_cls.return_value.__enter__.return_value = mock_executor | mock_executor_cls.return_value.__enter__.return_value = mock_executor | ||||
| # Verify results | # Verify results | ||||
| assert len(result) == 2 | assert len(result) == 2 | ||||
| # Verify service method was called | # Verify service method was called | ||||
| mock_service.get_draft_variables_by_selectors.assert_called_once_with( | mock_service.get_draft_variables_by_selectors.assert_called_once_with( | ||||
| draft_var_loader._app_id, selectors | draft_var_loader._app_id, selectors | ||||
| ) | ) | ||||
| # Verify offloaded variable loading was called | # Verify offloaded variable loading was called | ||||
| mock_load_offloaded.assert_called_once_with(offloaded_draft_var) | mock_load_offloaded.assert_called_once_with(offloaded_draft_var) | ||||
| def test_load_variables_all_offloaded_variables_unit(self, draft_var_loader): | def test_load_variables_all_offloaded_variables_unit(self, draft_var_loader): | ||||
| """Test load_variables method with only offloaded variables.""" | """Test load_variables method with only offloaded variables.""" | ||||
| selectors = [ | |||||
| ["node1", "offloaded_var1"], | |||||
| ["node2", "offloaded_var2"] | |||||
| ] | |||||
| selectors = [["node1", "offloaded_var1"], ["node2", "offloaded_var2"]] | |||||
| # Mock first offloaded variable | # Mock first offloaded variable | ||||
| offloaded_var1 = Mock(spec=WorkflowDraftVariable) | offloaded_var1 = Mock(spec=WorkflowDraftVariable) | ||||
| with patch("services.workflow_draft_variable_service.Session") as mock_session_cls: | with patch("services.workflow_draft_variable_service.Session") as mock_session_cls: | ||||
| mock_session = Mock() | mock_session = Mock() | ||||
| mock_session_cls.return_value.__enter__.return_value = mock_session | mock_session_cls.return_value.__enter__.return_value = mock_session | ||||
| mock_service = Mock() | mock_service = Mock() | ||||
| mock_service.get_draft_variables_by_selectors.return_value = draft_vars | mock_service.get_draft_variables_by_selectors.return_value = draft_vars | ||||
| with patch("services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service): | |||||
| with patch( | |||||
| "services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service | |||||
| ): | |||||
| with patch("services.workflow_draft_variable_service.StorageKeyLoader"): | with patch("services.workflow_draft_variable_service.StorageKeyLoader"): | ||||
| with patch("services.workflow_draft_variable_service.ThreadPoolExecutor") as mock_executor_cls: | with patch("services.workflow_draft_variable_service.ThreadPoolExecutor") as mock_executor_cls: | ||||
| mock_executor = Mock() | mock_executor = Mock() | ||||
| mock_executor_cls.return_value.__enter__.return_value = mock_executor | mock_executor_cls.return_value.__enter__.return_value = mock_executor | ||||
| mock_executor.map.return_value = [ | mock_executor.map.return_value = [ | ||||
| (("node1", "offloaded_var1"), Mock()), | (("node1", "offloaded_var1"), Mock()), | ||||
| (("node2", "offloaded_var2"), Mock()) | |||||
| (("node2", "offloaded_var2"), Mock()), | |||||
| ] | ] | ||||
| # Execute the method | # Execute the method | ||||
| # Verify results - since we have only offloaded variables, should have 2 results | # Verify results - since we have only offloaded variables, should have 2 results | ||||
| assert len(result) == 2 | assert len(result) == 2 | ||||
| # Verify ThreadPoolExecutor was used | # Verify ThreadPoolExecutor was used | ||||
| mock_executor_cls.assert_called_once_with(max_workers=10) | mock_executor_cls.assert_called_once_with(max_workers=10) | ||||
| mock_executor.map.assert_called_once() | |||||
| mock_executor.map.assert_called_once() |
| from core.variables.types import SegmentType | from core.variables.types import SegmentType | ||||
| from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID | from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID | ||||
| from core.workflow.enums import NodeType | from core.workflow.enums import NodeType | ||||
| from libs.uuid_utils import uuidv7 | |||||
| from models.account import Account | from models.account import Account | ||||
| from models.enums import DraftVariableType | from models.enums import DraftVariableType | ||||
| from models.workflow import ( | from models.workflow import ( | ||||
| created_by="test_user_id", | created_by="test_user_id", | ||||
| environment_variables=[], | environment_variables=[], | ||||
| conversation_variables=[], | conversation_variables=[], | ||||
| rag_pipeline_variables=[], | |||||
| ) | ) | ||||
| def test_reset_conversation_variable(self, mock_session): | def test_reset_conversation_variable(self, mock_session): |