Преглед изворни кода

fix(tests): fix broken tests and linter issues

tags/2.0.0-beta.1
QuantumGhost пре 2 месеци
родитељ
комит
e2ae89e08d
25 измењених фајлова са 642 додато и 850 уклоњено
  1. 2
    5
      api/commands.py
  2. 1
    2
      api/core/app/apps/common/workflow_response_converter.py
  3. 20
    3
      api/core/app/entities/app_invoke_entities.py
  4. 8
    3
      api/core/datasource/__base/datasource_runtime.py
  5. 2
    1
      api/core/file/models.py
  6. 1
    4
      api/core/rag/index_processor/processor/paragraph_index_processor.py
  7. 0
    1
      api/core/rag/index_processor/processor/parent_child_index_processor.py
  8. 1
    1
      api/core/schemas/registry.py
  9. 4
    1
      api/core/workflow/nodes/base/node.py
  10. 3
    3
      api/migrations/versions/2025_08_11_1138-17d4db47800c_add_pipeline_info_16.py
  11. 3
    1
      api/services/datasource_provider_service.py
  12. 216
    145
      api/services/variable_truncator.py
  13. 2
    1
      api/services/workflow_draft_variable_service.py
  14. 3
    3
      api/tasks/remove_app_and_related_data_task.py
  15. 0
    213
      api/tests/integration_tests/test_offload.py
  16. 77
    90
      api/tests/integration_tests/workflow/test_process_data_truncation_integration.py
  17. 30
    29
      api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_process_data.py
  18. 1
    1
      api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py
  19. 12
    10
      api/tests/unit_tests/core/schemas/test_resolver.py
  20. 2
    2
      api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py
  21. 66
    35
      api/tests/unit_tests/models/test_workflow_node_execution_offload.py
  22. 102
    91
      api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py
  23. 51
    170
      api/tests/unit_tests/services/test_variable_truncator.py
  24. 33
    35
      api/tests/unit_tests/services/workflow/test_draft_var_loader_simple.py
  25. 2
    0
      api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py

+ 2
- 5
api/commands.py Прегледај датотеку

result = conn.execute(sa.text(variables_query)) result = conn.execute(sa.text(variables_query))
orphaned_by_app = {} orphaned_by_app = {}
total_files = 0 total_files = 0
for row in result: for row in result:
app_id, variable_count, file_count = row app_id, variable_count, file_count = row
orphaned_by_app[app_id] = {
"variables": variable_count,
"files": file_count
}
orphaned_by_app[app_id] = {"variables": variable_count, "files": file_count}
total_files += file_count total_files += file_count


total_orphaned = sum(app_data["variables"] for app_data in orphaned_by_app.values()) total_orphaned = sum(app_data["variables"] for app_data in orphaned_by_app.values())

+ 1
- 2
api/core/app/apps/common/workflow_response_converter.py Прегледај датотеку

from core.tools.tool_manager import ToolManager from core.tools.tool_manager import ToolManager
from core.variables.segments import ArrayFileSegment, FileSegment, Segment from core.variables.segments import ArrayFileSegment, FileSegment, Segment
from core.workflow.entities import WorkflowExecution, WorkflowNodeExecution from core.workflow.entities import WorkflowExecution, WorkflowNodeExecution
from core.workflow.enums import WorkflowNodeExecutionStatus
from core.workflow.nodes import NodeType
from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter
from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models import ( from models import (

+ 20
- 3
api/core/app/entities/app_invoke_entities.py Прегледај датотеку

from collections.abc import Mapping, Sequence from collections.abc import Mapping, Sequence
from enum import StrEnum from enum import StrEnum
from typing import Any, Optional
from typing import TYPE_CHECKING, Any, Optional


from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator


if TYPE_CHECKING:
from core.ops.ops_trace_manager import TraceQueueManager

from constants import UUID_NIL from constants import UUID_NIL
from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAppConfig from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAppConfig
from core.entities.provider_configuration import ProviderModelBundle from core.entities.provider_configuration import ProviderModelBundle
extras: dict[str, Any] = Field(default_factory=dict) extras: dict[str, Any] = Field(default_factory=dict)


# tracing instance # tracing instance
# Using Any to avoid circular import with TraceQueueManager
trace_manager: Optional[Any] = None
trace_manager: Optional["TraceQueueManager"] = None




class EasyUIBasedAppGenerateEntity(AppGenerateEntity): class EasyUIBasedAppGenerateEntity(AppGenerateEntity):
inputs: dict inputs: dict


single_loop_run: Optional[SingleLoopRunEntity] = None single_loop_run: Optional[SingleLoopRunEntity] = None


# Import TraceQueueManager at runtime to resolve forward references
from core.ops.ops_trace_manager import TraceQueueManager

# Rebuild models that use forward references
AppGenerateEntity.model_rebuild()
EasyUIBasedAppGenerateEntity.model_rebuild()
ConversationAppGenerateEntity.model_rebuild()
ChatAppGenerateEntity.model_rebuild()
CompletionAppGenerateEntity.model_rebuild()
AgentChatAppGenerateEntity.model_rebuild()
AdvancedChatAppGenerateEntity.model_rebuild()
WorkflowAppGenerateEntity.model_rebuild()
RagPipelineGenerateEntity.model_rebuild()

+ 8
- 3
api/core/datasource/__base/datasource_runtime.py Прегледај датотеку

from typing import Any, Optional
from typing import TYPE_CHECKING, Any, Optional


from openai import BaseModel from openai import BaseModel
from pydantic import Field from pydantic import Field


from core.app.entities.app_invoke_entities import InvokeFrom
from core.datasource.entities.datasource_entities import DatasourceInvokeFrom from core.datasource.entities.datasource_entities import DatasourceInvokeFrom


if TYPE_CHECKING:
from core.app.entities.app_invoke_entities import InvokeFrom



class DatasourceRuntime(BaseModel): class DatasourceRuntime(BaseModel):
""" """


tenant_id: str tenant_id: str
datasource_id: Optional[str] = None datasource_id: Optional[str] = None
invoke_from: Optional[InvokeFrom] = None
invoke_from: Optional["InvokeFrom"] = None
datasource_invoke_from: Optional[DatasourceInvokeFrom] = None datasource_invoke_from: Optional[DatasourceInvokeFrom] = None
credentials: dict[str, Any] = Field(default_factory=dict) credentials: dict[str, Any] = Field(default_factory=dict)
runtime_parameters: dict[str, Any] = Field(default_factory=dict) runtime_parameters: dict[str, Any] = Field(default_factory=dict)
""" """


def __init__(self): def __init__(self):
# Import InvokeFrom locally to avoid circular import
from core.app.entities.app_invoke_entities import InvokeFrom

super().__init__( super().__init__(
tenant_id="fake_tenant_id", tenant_id="fake_tenant_id",
datasource_id="fake_datasource_id", datasource_id="fake_datasource_id",

+ 2
- 1
api/core/file/models.py Прегледај датотеку

if self.related_id is None: if self.related_id is None:
raise ValueError("Missing file related_id") raise ValueError("Missing file related_id")
return helpers.get_signed_file_url(upload_file_id=self.related_id) return helpers.get_signed_file_url(upload_file_id=self.related_id)
elif self.transfer_method == FileTransferMethod.TOOL_FILE or self.transfer_method == FileTransferMethod.DATASOURCE_FILE:
elif self.transfer_method in [FileTransferMethod.TOOL_FILE, FileTransferMethod.DATASOURCE_FILE]:
assert self.related_id is not None assert self.related_id is not None
assert self.extension is not None assert self.extension is not None
return sign_tool_file(tool_file_id=self.related_id, extension=self.extension) return sign_tool_file(tool_file_id=self.related_id, extension=self.extension)

def to_plugin_parameter(self) -> dict[str, Any]: def to_plugin_parameter(self) -> dict[str, Any]:
return { return {
"dify_model_identity": FILE_MODEL_IDENTITY, "dify_model_identity": FILE_MODEL_IDENTITY,

+ 1
- 4
api/core/rag/index_processor/processor/paragraph_index_processor.py Прегледај датотеку

preview = [] preview = []
for content in chunks: for content in chunks:
preview.append({"content": content}) preview.append({"content": content})
return {"chunk_structure": IndexType.PARAGRAPH_INDEX,
"preview": preview,
"total_segments": len(chunks)
}
return {"chunk_structure": IndexType.PARAGRAPH_INDEX, "preview": preview, "total_segments": len(chunks)}
else: else:
raise ValueError("Chunks is not a list") raise ValueError("Chunks is not a list")

+ 0
- 1
api/core/rag/index_processor/processor/parent_child_index_processor.py Прегледај датотеку

vector.create(all_child_documents) vector.create(all_child_documents)


def format_preview(self, chunks: Any) -> Mapping[str, Any]: def format_preview(self, chunks: Any) -> Mapping[str, Any]:

parent_childs = ParentChildStructureChunk(**chunks) parent_childs = ParentChildStructureChunk(**chunks)
preview = [] preview = []
for parent_child in parent_childs.parent_child_chunks: for parent_child in parent_childs.parent_child_chunks:

+ 1
- 1
api/core/schemas/registry.py Прегледај датотеку

"""Returns all schemas for a version in the API format""" """Returns all schemas for a version in the API format"""
version_schemas = self.versions.get(version, {}) version_schemas = self.versions.get(version, {})


result = []
result: list[Mapping[str, Any]] = []
for schema_name, schema in version_schemas.items(): for schema_name, schema in version_schemas.items():
result.append({"name": schema_name, "label": schema.get("title", schema_name), "schema": schema}) result.append({"name": schema_name, "label": schema.get("title", schema_name), "schema": schema})



+ 4
- 1
api/core/workflow/nodes/base/node.py Прегледај датотеку

from core.workflow.nodes.datasource.datasource_node import DatasourceNode from core.workflow.nodes.datasource.datasource_node import DatasourceNode


if isinstance(self, DatasourceNode): if isinstance(self, DatasourceNode):
start_event.provider_id = f"{getattr(self.get_base_node_data(), 'plugin_id', '')}/{getattr(self.get_base_node_data(), 'provider_name', '')}"
plugin_id = getattr(self.get_base_node_data(), "plugin_id", "")
provider_name = getattr(self.get_base_node_data(), "provider_name", "")

start_event.provider_id = f"{plugin_id}/{provider_name}"
start_event.provider_type = getattr(self.get_base_node_data(), "provider_type", "") start_event.provider_type = getattr(self.get_base_node_data(), "provider_type", "")


from typing import cast from typing import cast

+ 3
- 3
api/migrations/versions/2025_08_11_1138-17d4db47800c_add_pipeline_info_16.py Прегледај датотеку

"""datasource_oauth_refresh """datasource_oauth_refresh


Revision ID: 17d4db47800c Revision ID: 17d4db47800c
Revises: 223c3f882c69
Revises: 74e5f667f4b7
Create Date: 2025-08-11 11:38:03.662874 Create Date: 2025-08-11 11:38:03.662874


""" """


# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision = '17d4db47800c' revision = '17d4db47800c'
down_revision = '223c3f882c69'
down_revision = '74e5f667f4b7'
branch_labels = None branch_labels = None
depends_on = None depends_on = None


# ### commands auto generated by Alembic - please adjust! ### # ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('datasource_providers', schema=None) as batch_op: with op.batch_alter_table('datasource_providers', schema=None) as batch_op:
batch_op.add_column(sa.Column('expires_at', sa.Integer(), nullable=False, server_default='-1')) batch_op.add_column(sa.Column('expires_at', sa.Integer(), nullable=False, server_default='-1'))
# ### end Alembic commands ### # ### end Alembic commands ###





+ 3
- 1
api/services/datasource_provider_service.py Прегледај датотеку

credentials = self.list_datasource_credentials( credentials = self.list_datasource_credentials(
tenant_id=tenant_id, provider=datasource.provider, plugin_id=datasource.plugin_id tenant_id=tenant_id, provider=datasource.provider, plugin_id=datasource.plugin_id
) )
redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{datasource_provider_id}/datasource/callback"
redirect_uri = "{}/console/api/oauth/plugin/{}/datasource/callback".format(
dify_config.CONSOLE_API_URL, datasource_provider_id
)
datasource_credentials.append( datasource_credentials.append(
{ {
"provider": datasource.provider, "provider": datasource.provider,

+ 216
- 145
api/services/variable_truncator.py Прегледај датотеку

import dataclasses import dataclasses
import json
from collections.abc import Mapping from collections.abc import Mapping
from typing import Any, TypeAlias
from enum import StrEnum
from typing import Any, Generic, TypeAlias, TypedDict, TypeVar, overload


from configs import dify_config from configs import dify_config
from core.file.models import File
from core.variables.segments import ( from core.variables.segments import (
ArrayFileSegment, ArrayFileSegment,
ArraySegment, ArraySegment,
Segment, Segment,
StringSegment, StringSegment,
) )
from core.variables.utils import dumps_with_segments


LARGE_VARIABLE_THRESHOLD = 10 * 1024 # 100KB in bytes
OBJECT_CHAR_LIMIT = 5000
ARRAY_CHAR_LIMIT = 1000
_MAX_DEPTH = 100


_MAX_DEPTH = 20

class _QAKeys:
"""dict keys for _QAStructure"""

QA_CHUNKS = "qa_chunks"
QUESTION = "question"
ANSWER = "answer"


class _PCKeys:
"""dict keys for _ParentChildStructure"""

PARENT_MODE = "parent_mode"
PARENT_CHILD_CHUNKS = "parent_child_chunks"
PARENT_CONTENT = "parent_content"
CHILD_CONTENTS = "child_contents"


class _QAStructureItem(TypedDict):
question: str
answer: str


class _QAStructure(TypedDict):
qa_chunks: list[_QAStructureItem]


class _ParentChildChunkItem(TypedDict):
parent_content: str
child_contents: list[str]


class _ParentChildStructure(TypedDict):
parent_mode: str
parent_child_chunks: list[_ParentChildChunkItem]


class _SpecialChunkType(StrEnum):
parent_child = "parent_child"
qa = "qa"


_T = TypeVar("_T")


@dataclasses.dataclass(frozen=True)
class _PartResult(Generic[_T]):
value: _T
value_size: int
truncated: bool




class MaxDepthExceededError(Exception): class MaxDepthExceededError(Exception):
Uses recursive size calculation to avoid repeated JSON serialization. Uses recursive size calculation to avoid repeated JSON serialization.
""" """


_JSON_SEPARATORS = (",", ":")

def __init__( def __init__(
self, self,
string_length_limit=5000, string_length_limit=5000,
array_element_limit: int = 20, array_element_limit: int = 20,
max_size_bytes: int = LARGE_VARIABLE_THRESHOLD,
max_size_bytes: int = 1024_000, # 100KB
): ):
if string_length_limit <= 3: if string_length_limit <= 3:
raise ValueError("string_length_limit should be greater than 3.") raise ValueError("string_length_limit should be greater than 3.")
of a WorkflowNodeExecution record. This ensures the mappings remain within the of a WorkflowNodeExecution record. This ensures the mappings remain within the
specified size limits while preserving their structure. specified size limits while preserving their structure.
""" """
size = self.calculate_json_size(v)
if size < self._max_size_bytes:
return v, False
budget = self._max_size_bytes budget = self._max_size_bytes
is_truncated = False is_truncated = False
truncated_mapping: dict[str, Any] = {} truncated_mapping: dict[str, Any] = {}
size = len(v.items())
remaining = size
length = len(v.items())
used_size = 0
for key, value in v.items(): for key, value in v.items():
budget -= self.calculate_json_size(key)
if budget < 0:
break
truncated_value, value_truncated = self._truncate_value_to_budget(value, budget // remaining)
if value_truncated:
is_truncated = True
truncated_mapping[key] = truncated_value
# TODO(QuantumGhost): This approach is inefficient. Ideally, the truncation function should directly
# report the size of the truncated value.
budget -= self.calculate_json_size(truncated_value) + 2 # ":" and ","
used_size += self.calculate_json_size(key)
if used_size > budget:
truncated_mapping[key] = "..."
continue
value_budget = (budget - used_size) // (length - len(truncated_mapping))
if isinstance(value, Segment):
part_result = self._truncate_segment(value, value_budget)
else:
part_result = self._truncate_json_primitives(value, value_budget)
is_truncated = is_truncated or part_result.truncated
truncated_mapping[key] = part_result.value
used_size += part_result.value_size
return truncated_mapping, is_truncated return truncated_mapping, is_truncated


@staticmethod @staticmethod
return True return True


def truncate(self, segment: Segment) -> TruncationResult: def truncate(self, segment: Segment) -> TruncationResult:
if isinstance(segment, StringSegment):
result = self._truncate_segment(segment, self._string_length_limit)
else:
result = self._truncate_segment(segment, self._max_size_bytes)

if result.value_size > self._max_size_bytes:
if isinstance(result.value, str):
result = self._truncate_string(result.value, self._max_size_bytes)
return TruncationResult(StringSegment(value=result.value), True)

# Apply final fallback - convert to JSON string and truncate
json_str = dumps_with_segments(result.value, ensure_ascii=False)
if len(json_str) > self._max_size_bytes:
json_str = json_str[: self._max_size_bytes] + "..."
return TruncationResult(result=StringSegment(value=json_str), truncated=True)

return TruncationResult(
result=segment.model_copy(update={"value": result.value.value}), truncated=result.truncated
)

def _truncate_segment(self, segment: Segment, target_size: int) -> _PartResult[Segment]:
""" """
Apply smart truncation to a variable value. Apply smart truncation to a variable value.


""" """


if not VariableTruncator._segment_need_truncation(segment): if not VariableTruncator._segment_need_truncation(segment):
return TruncationResult(result=segment, truncated=False)
return _PartResult(segment, self.calculate_json_size(segment.value), False)


result: _PartResult[Any]
# Apply type-specific truncation with target size # Apply type-specific truncation with target size
if isinstance(segment, ArraySegment): if isinstance(segment, ArraySegment):
truncated_value, was_truncated = self._truncate_array(segment.value, self._max_size_bytes)
result = self._truncate_array(segment.value, target_size)
elif isinstance(segment, StringSegment): elif isinstance(segment, StringSegment):
truncated_value, was_truncated = self._truncate_string(segment.value)
result = self._truncate_string(segment.value, target_size)
elif isinstance(segment, ObjectSegment): elif isinstance(segment, ObjectSegment):
truncated_value, was_truncated = self._truncate_object(segment.value, self._max_size_bytes)
result = self._truncate_object(segment.value, target_size)
else: else:
raise AssertionError("this should be unreachable.") raise AssertionError("this should be unreachable.")


# Check if we still exceed the final character limit after type-specific truncation
if not was_truncated:
return TruncationResult(result=segment, truncated=False)

truncated_size = self.calculate_json_size(truncated_value)
if truncated_size > self._max_size_bytes:
if isinstance(truncated_value, str):
return TruncationResult(StringSegment(value=truncated_value[: self._max_size_bytes - 3]), True)
# Apply final fallback - convert to JSON string and truncate
json_str = json.dumps(truncated_value, ensure_ascii=False, separators=self._JSON_SEPARATORS)
if len(json_str) > self._max_size_bytes:
json_str = json_str[: self._max_size_bytes] + "..."
return TruncationResult(result=StringSegment(value=json_str), truncated=True)

return TruncationResult(result=segment.model_copy(update={"value": truncated_value}), truncated=True)
return _PartResult(
value=segment.model_copy(update={"value": result.value}),
value_size=result.value_size,
truncated=result.truncated,
)


@staticmethod @staticmethod
def calculate_json_size(value: Any, depth=0) -> int: def calculate_json_size(value: Any, depth=0) -> int:
"""Recursively calculate JSON size without serialization.""" """Recursively calculate JSON size without serialization."""
if isinstance(value, Segment):
return VariableTruncator.calculate_json_size(value.value)
if depth > _MAX_DEPTH: if depth > _MAX_DEPTH:
raise MaxDepthExceededError() raise MaxDepthExceededError()
if isinstance(value, str): if isinstance(value, str):
# For strings, we need to account for escaping and quotes
# Rough estimate: each character might need escaping, plus 2 for quotes
return len(value.encode("utf-8")) + 2
# Ideally, the size of strings should be calculated based on their utf-8 encoded length.
# However, this adds complexity as we would need to compute encoded sizes consistently
# throughout the code. Therefore, we approximate the size using the string's length.
# Rough estimate: number of characters, plus 2 for quotes
return len(value) + 2
elif isinstance(value, (int, float)): elif isinstance(value, (int, float)):
return len(str(value)) return len(str(value))
elif isinstance(value, bool): elif isinstance(value, bool):
total += 1 # ":" total += 1 # ":"
total += VariableTruncator.calculate_json_size(value[key], depth=depth + 1) total += VariableTruncator.calculate_json_size(value[key], depth=depth + 1)
return total return total
elif isinstance(value, File):
return VariableTruncator.calculate_json_size(value.model_dump(), depth=depth + 1)
else: else:
raise UnknownTypeError(f"got unknown type {type(value)}") raise UnknownTypeError(f"got unknown type {type(value)}")


def _truncate_string(self, value: str) -> tuple[str, bool]:
"""Truncate string values."""
if len(value) <= self._string_length_limit:
return value, False
return value[: self._string_length_limit - 3] + "...", True
def _truncate_string(self, value: str, target_size: int) -> _PartResult[str]:
if (size := self.calculate_json_size(value)) < target_size:
return _PartResult(value, size, False)
if target_size < 5:
return _PartResult("...", 5, True)
truncated_size = min(self._string_length_limit, target_size - 5)
truncated_value = value[:truncated_size] + "..."
return _PartResult(truncated_value, self.calculate_json_size(truncated_value), True)


def _truncate_array(self, value: list, target_size: int) -> tuple[list, bool]:
def _truncate_array(self, value: list, target_size: int) -> _PartResult[list]:
""" """
Truncate array with correct strategy: Truncate array with correct strategy:
1. First limit to 20 items 1. First limit to 20 items
2. If still too large, truncate individual items 2. If still too large, truncate individual items
""" """


# Step 1: Limit to first 20 items
limited_items = value[: self._array_element_limit]
was_truncated = len(limited_items) < len(value)
truncated_value: list[Any] = []
truncated = False
used_size = self.calculate_json_size([])


# Step 2: Check if we still exceed the target size
current_size = self.calculate_json_size(limited_items)
if current_size <= target_size:
return limited_items, was_truncated
target_length = self._array_element_limit


# Step 3: Truncate individual items to fit within target size
truncated_items = []
remaining_size = target_size - 2 # Account for []

for i, item in enumerate(limited_items):
for i, item in enumerate(value):
if i >= target_length:
return _PartResult(truncated_value, used_size, True)
if i > 0: if i > 0:
remaining_size -= 1 # Account for comma
used_size += 1 # Account for comma


if remaining_size <= 0:
if used_size > target_size:
break break


# Calculate how much space this item can use
remaining_items = len(limited_items) - i
item_budget = remaining_size // remaining_items

# Truncate the item to fit within budget
truncated_item, item_truncated = self._truncate_item_to_budget(item, item_budget)
truncated_items.append(truncated_item)
part_result = self._truncate_json_primitives(item, target_size - used_size)
truncated_value.append(part_result.value)
used_size += part_result.value_size
truncated = part_result.truncated
return _PartResult(truncated_value, used_size, truncated)


# Update remaining size
item_size = self.calculate_json_size(truncated_item)
remaining_size -= item_size
@classmethod
def _maybe_qa_structure(cls, m: Mapping[str, Any]) -> bool:
qa_chunks = m.get(_QAKeys.QA_CHUNKS)
if qa_chunks is None:
return False
if not isinstance(qa_chunks, list):
return False
return True


if item_truncated:
was_truncated = True
@classmethod
def _maybe_parent_child_structure(cls, m: Mapping[str, Any]) -> bool:
parent_mode = m.get(_PCKeys.PARENT_MODE)
if parent_mode is None:
return False
if not isinstance(parent_mode, str):
return False
parent_child_chunks = m.get(_PCKeys.PARENT_CHILD_CHUNKS)
if parent_child_chunks is None:
return False
if not isinstance(parent_child_chunks, list):
return False


return truncated_items, True
return True


def _truncate_object(self, value: Mapping[str, Any], target_size: int) -> tuple[Mapping[str, Any], bool]:
def _truncate_object(self, mapping: Mapping[str, Any], target_size: int) -> _PartResult[Mapping[str, Any]]:
""" """
Truncate object with key preservation priority. Truncate object with key preservation priority.


1. Keep all keys, truncate values to fit within budget 1. Keep all keys, truncate values to fit within budget
2. If still too large, drop keys starting from the end 2. If still too large, drop keys starting from the end
""" """
if not value:
return value, False
if not mapping:
return _PartResult(mapping, self.calculate_json_size(mapping), False)


truncated_obj = {} truncated_obj = {}
was_truncated = False
remaining_size = target_size - 2 # Account for {}
truncated = False
used_size = self.calculate_json_size({})


# Sort keys to ensure deterministic behavior # Sort keys to ensure deterministic behavior
sorted_keys = sorted(value.keys())
sorted_keys = sorted(mapping.keys())


for i, key in enumerate(sorted_keys): for i, key in enumerate(sorted_keys):
val = value[key]

if i > 0:
remaining_size -= 1 # Account for comma

if remaining_size <= 0:
if used_size > target_size:
# No more room for additional key-value pairs # No more room for additional key-value pairs
was_truncated = True
truncated = True
break break


pair_size = 0

if i > 0:
pair_size += 1 # Account for comma

# Calculate budget for this key-value pair # Calculate budget for this key-value pair
key_size = self.calculate_json_size(str(key)) + 1 # +1 for ":"
# do not try to truncate keys, as we want to keep the structure of
# object.
key_size = self.calculate_json_size(key) + 1 # +1 for ":"
pair_size += key_size
remaining_pairs = len(sorted_keys) - i remaining_pairs = len(sorted_keys) - i
value_budget = max(0, (remaining_size - key_size) // remaining_pairs)
value_budget = max(0, (target_size - pair_size - used_size) // remaining_pairs)


if value_budget <= 0: if value_budget <= 0:
was_truncated = True
truncated = True
break break


# Truncate the value to fit within budget # Truncate the value to fit within budget
truncated_val, val_truncated = self._truncate_value_to_budget(val, value_budget)

truncated_obj[key] = truncated_val
if val_truncated:
was_truncated = True

# Update remaining size
pair_size = key_size + self.calculate_json_size(truncated_val)
remaining_size -= pair_size

return truncated_obj, was_truncated or len(truncated_obj) < len(value)

def _truncate_item_to_budget(self, item: Any, budget: int) -> tuple[Any, bool]:
"""Truncate an array item to fit within a size budget."""
if isinstance(item, str):
# For strings, truncate to fit within budget (accounting for quotes)
max_chars = max(0, budget - 5) # -5 for quotes and potential "..."
max_chars = min(max_chars, ARRAY_CHAR_LIMIT)
if len(item) <= max_chars:
return item, False
return item[:max_chars] + "...", True
elif isinstance(item, dict):
# For objects, recursively truncate
return self._truncate_object(item, budget)
elif isinstance(item, list):
# For nested arrays, recursively truncate
return self._truncate_array(item, budget)
else:
# For other types, check if they fit
item_size = self.calculate_json_size(item)
if item_size <= budget:
return item, False
value = mapping[key]
if isinstance(value, Segment):
value_result = self._truncate_segment(value, value_budget)
else: else:
# Convert to string and truncate
str_item = str(item)
return self._truncate_item_to_budget(str_item, budget)
value_result = self._truncate_json_primitives(mapping[key], value_budget)

truncated_obj[key] = value_result.value
pair_size += value_result.value_size
used_size += pair_size

if value_result.truncated:
truncated = True

return _PartResult(truncated_obj, used_size, truncated)


def _truncate_value_to_budget(self, val: Any, budget: int) -> tuple[Any, bool]:
@overload
def _truncate_json_primitives(self, val: str, target_size: int) -> _PartResult[str]: ...

@overload
def _truncate_json_primitives(self, val: list, target_size: int) -> _PartResult[list]: ...

@overload
def _truncate_json_primitives(self, val: dict, target_size: int) -> _PartResult[dict]: ...

@overload
def _truncate_json_primitives(self, val: bool, target_size: int) -> _PartResult[bool]: ...

@overload
def _truncate_json_primitives(self, val: int, target_size: int) -> _PartResult[int]: ...

@overload
def _truncate_json_primitives(self, val: float, target_size: int) -> _PartResult[float]: ...

@overload
def _truncate_json_primitives(self, val: None, target_size: int) -> _PartResult[None]: ...

def _truncate_json_primitives(
self, val: str | list | dict | bool | int | float | None, target_size: int
) -> _PartResult[Any]:
"""Truncate a value within an object to fit within budget.""" """Truncate a value within an object to fit within budget."""
if isinstance(val, str): if isinstance(val, str):
# For strings, respect OBJECT_CHAR_LIMIT but also budget
max_chars = min(OBJECT_CHAR_LIMIT, max(0, budget - 5)) # -5 for quotes and "..."
if len(val) <= max_chars:
return val, False
return val[:max_chars] + "...", True
return self._truncate_string(val, target_size)
elif isinstance(val, list): elif isinstance(val, list):
return self._truncate_array(val, budget)
return self._truncate_array(val, target_size)
elif isinstance(val, dict): elif isinstance(val, dict):
return self._truncate_object(val, budget)
return self._truncate_object(val, target_size)
elif val is None or isinstance(val, (bool, int, float)):
return _PartResult(val, self.calculate_json_size(val), False)
else: else:
# For other types, check if they fit
val_size = self.calculate_json_size(val)
if val_size <= budget:
return val, False
else:
# Convert to string and truncate
return self._truncate_value_to_budget(str(val), budget)
raise AssertionError("this statement should be unreachable.")

+ 2
- 1
api/services/workflow_draft_variable_service.py Прегледај датотеку

FileSegment, FileSegment,
) )
from core.variables.types import SegmentType from core.variables.types import SegmentType
from core.variables.utils import dumps_with_segments
from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
from core.workflow.enums import SystemVariableKey from core.workflow.enums import SystemVariableKey
from core.workflow.nodes import NodeType from core.workflow.nodes import NodeType
filename = f"{self._generate_filename(name)}.txt" filename = f"{self._generate_filename(name)}.txt"
else: else:
# For other types, store as JSON # For other types, store as JSON
original_content_serialized = json.dumps(value_seg.value, ensure_ascii=False, separators=(",", ":"))
original_content_serialized = dumps_with_segments(value_seg.value, ensure_ascii=False)
content_type = "application/json" content_type = "application/json"
filename = f"{self._generate_filename(name)}.json" filename = f"{self._generate_filename(name)}.json"



+ 3
- 3
api/tasks/remove_app_and_related_data_task.py Прегледај датотеку

upload_file_ids.append(upload_file_id) upload_file_ids.append(upload_file_id)
files_deleted += 1 files_deleted += 1
except Exception as e: except Exception as e:
logging.warning(f"Failed to delete storage object {storage_key}: {e}")
logging.exception("Failed to delete storage object %s", storage_key)
# Continue with database cleanup even if storage deletion fails # Continue with database cleanup even if storage deletion fails
upload_file_ids.append(upload_file_id) upload_file_ids.append(upload_file_id)


""" """
conn.execute(sa.text(delete_variable_files_sql), {"file_ids": tuple(file_ids)}) conn.execute(sa.text(delete_variable_files_sql), {"file_ids": tuple(file_ids)})


except Exception as e:
logging.exception(f"Error deleting draft variable offload data: {e}")
except Exception:
logging.exception("Error deleting draft variable offload data:")
# Don't raise, as we want to continue with the main deletion process # Don't raise, as we want to continue with the main deletion process


return files_deleted return files_deleted

+ 0
- 213
api/tests/integration_tests/test_offload.py Прегледај датотеку

import uuid

import pytest
from sqlalchemy.orm import Session, joinedload, selectinload

from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from libs.uuid_utils import uuidv7
from models.enums import CreatorUserRole
from models.model import UploadFile
from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload, WorkflowNodeExecutionTriggeredFrom


@pytest.fixture
def session(flask_req_ctx):
with Session(bind=db.engine, expire_on_commit=False) as session:
yield session


def test_offload(session, setup_account):
tenant_id = str(uuid.uuid4())
app_id = str(uuid.uuid4())
# step 1: create a UploadFile
input_upload_file = UploadFile(
tenant_id=tenant_id,
storage_type="local",
key="fake_storage_key",
name="test_file.txt",
size=1024,
extension="txt",
mime_type="text/plain",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=setup_account.id,
created_at=naive_utc_now(),
used=False,
)
output_upload_file = UploadFile(
tenant_id=tenant_id,
storage_type="local",
key="fake_storage_key",
name="test_file.txt",
size=1024,
extension="txt",
mime_type="text/plain",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=setup_account.id,
created_at=naive_utc_now(),
used=False,
)
session.add(input_upload_file)
session.add(output_upload_file)
session.flush()

# step 2: create a WorkflowNodeExecutionModel
node_execution = WorkflowNodeExecutionModel(
id=str(uuid.uuid4()),
tenant_id=tenant_id,
app_id=app_id,
workflow_id=str(uuid.uuid4()),
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
index=1,
node_id="test_node_id",
node_type="test",
title="Test Node",
status="succeeded",
created_by_role=CreatorUserRole.ACCOUNT.value,
created_by=setup_account.id,
)
session.add(node_execution)
session.flush()

# step 3: create a WorkflowNodeExecutionOffload
offload = WorkflowNodeExecutionOffload(
id=uuidv7(),
tenant_id=tenant_id,
app_id=app_id,
node_execution_id=node_execution.id,
inputs_file_id=input_upload_file.id,
outputs_file_id=output_upload_file.id,
)
session.add(offload)
session.flush()

# Test preloading - this should work without raising LazyLoadError
result = (
session.query(WorkflowNodeExecutionModel)
.options(
selectinload(WorkflowNodeExecutionModel.offload_data).options(
joinedload(
WorkflowNodeExecutionOffload.inputs_file,
),
joinedload(
WorkflowNodeExecutionOffload.outputs_file,
),
)
)
.filter(WorkflowNodeExecutionModel.id == node_execution.id)
.first()
)

# Verify the relationships are properly loaded
assert result is not None
assert result.offload_data is not None
assert result.offload_data.inputs_file is not None
assert result.offload_data.inputs_file.id == input_upload_file.id
assert result.offload_data.inputs_file.name == "test_file.txt"

# Test the computed properties
assert result.inputs_truncated is True
assert result.outputs_truncated is False
assert False


def _test_offload_save(session, setup_account):
tenant_id = str(uuid.uuid4())
app_id = str(uuid.uuid4())
# step 1: create a UploadFile
input_upload_file = UploadFile(
tenant_id=tenant_id,
storage_type="local",
key="fake_storage_key",
name="test_file.txt",
size=1024,
extension="txt",
mime_type="text/plain",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=setup_account.id,
created_at=naive_utc_now(),
used=False,
)
output_upload_file = UploadFile(
tenant_id=tenant_id,
storage_type="local",
key="fake_storage_key",
name="test_file.txt",
size=1024,
extension="txt",
mime_type="text/plain",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=setup_account.id,
created_at=naive_utc_now(),
used=False,
)

node_execution_id = id = str(uuid.uuid4())

# step 3: create a WorkflowNodeExecutionOffload
offload = WorkflowNodeExecutionOffload(
id=uuidv7(),
tenant_id=tenant_id,
app_id=app_id,
node_execution_id=node_execution_id,
)
offload.inputs_file = input_upload_file
offload.outputs_file = output_upload_file

# step 2: create a WorkflowNodeExecutionModel
node_execution = WorkflowNodeExecutionModel(
id=str(uuid.uuid4()),
tenant_id=tenant_id,
app_id=app_id,
workflow_id=str(uuid.uuid4()),
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
index=1,
node_id="test_node_id",
node_type="test",
title="Test Node",
status="succeeded",
created_by_role=CreatorUserRole.ACCOUNT.value,
created_by=setup_account.id,
)
node_execution.offload_data = offload
session.add(node_execution)
session.flush()

assert False


"""
2025-08-21 15:34:49,570 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-08-21 15:34:49,572 INFO sqlalchemy.engine.Engine INSERT INTO upload_files (id, tenant_id, storage_type, key, name, size, extension, mime_type, created_by_role, created_by, created_at, used, used_by, used_at, hash, source_url) VALUES (%(id__0)s::UUID, %(tenant_id__0)s::UUID, %(storage_type__0)s, %(k ... 410 characters truncated ... (created_at__1)s, %(used__1)s, %(used_by__1)s::UUID, %(used_at__1)s, %(hash__1)s, %(source_url__1)s)
2025-08-21 15:34:49,572 INFO sqlalchemy.engine.Engine [generated in 0.00009s (insertmanyvalues) 1/1 (unordered)] {'created_at__0': datetime.datetime(2025, 8, 21, 15, 34, 49, 570482), 'id__0': '366621fa-4326-403e-8709-62e4d0de7367', 'storage_type__0': 'local', 'extension__0': 'txt', 'created_by__0': 'ccc7657c-fb48-46bd-8f42-c837b14eab18', 'used_at__0': None, 'used_by__0': None, 'source_url__0': '', 'mime_type__0': 'text/plain', 'created_by_role__0': 'account', 'used__0': False, 'size__0': 1024, 'tenant_id__0': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'hash__0': None, 'key__0': 'fake_storage_key', 'name__0': 'test_file.txt', 'created_at__1': datetime.datetime(2025, 8, 21, 15, 34, 49, 570563), 'id__1': '3cdec641-a452-4df0-a9af-4a1a30c27ea5', 'storage_type__1': 'local', 'extension__1': 'txt', 'created_by__1': 'ccc7657c-fb48-46bd-8f42-c837b14eab18', 'used_at__1': None, 'used_by__1': None, 'source_url__1': '', 'mime_type__1': 'text/plain', 'created_by_role__1': 'account', 'used__1': False, 'size__1': 1024, 'tenant_id__1': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'hash__1': None, 'key__1': 'fake_storage_key', 'name__1': 'test_file.txt'}
2025-08-21 15:34:49,576 INFO sqlalchemy.engine.Engine INSERT INTO workflow_node_executions (id, tenant_id, app_id, workflow_id, triggered_from, workflow_run_id, index, predecessor_node_id, node_execution_id, node_id, node_type, title, inputs, process_data, outputs, status, error, execution_metadata, created_by_role, created_by, finished_at) VALUES (%(id)s::UUID, %(tenant_id)s::UUID, %(app_id)s::UUID, %(workflow_id)s::UUID, %(triggered_from)s, %(workflow_run_id)s::UUID, %(index)s, %(predecessor_node_id)s, %(node_execution_id)s, %(node_id)s, %(node_type)s, %(title)s, %(inputs)s, %(process_data)s, %(outputs)s, %(status)s, %(error)s, %(execution_metadata)s, %(created_by_role)s, %(created_by)s::UUID, %(finished_at)s) RETURNING workflow_node_executions.elapsed_time, workflow_node_executions.created_at
2025-08-21 15:34:49,576 INFO sqlalchemy.engine.Engine [generated in 0.00019s] {'id': '9aac28b6-b6fc-4aea-abdf-21da3227e621', 'tenant_id': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'app_id': '79fa81c7-2760-40db-af54-74cb2fea2ce7', 'workflow_id': '95d341e3-381c-4c54-a383-f685a9741053', 'triggered_from': <WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN: 'workflow-run'>, 'workflow_run_id': None, 'index': 1, 'predecessor_node_id': None, 'node_execution_id': None, 'node_id': 'test_node_id', 'node_type': 'test', 'title': 'Test Node', 'inputs': None, 'process_data': None, 'outputs': None, 'status': 'succeeded', 'error': None, 'execution_metadata': None, 'created_by_role': 'account', 'created_by': 'ccc7657c-fb48-46bd-8f42-c837b14eab18', 'finished_at': None}
2025-08-21 15:34:49,579 INFO sqlalchemy.engine.Engine INSERT INTO workflow_node_execution_offload (id, created_at, tenant_id, app_id, node_execution_id, inputs_file_id, outputs_file_id) VALUES (%(id)s::UUID, %(created_at)s, %(tenant_id)s::UUID, %(app_id)s::UUID, %(node_execution_id)s::UUID, %(inputs_file_id)s::UUID, %(outputs_file_id)s::UUID)
2025-08-21 15:34:49,579 INFO sqlalchemy.engine.Engine [generated in 0.00016s] {'id': '0198cd44-b7ea-724b-9e1b-5f062a2ef45b', 'created_at': datetime.datetime(2025, 8, 21, 15, 34, 49, 579072), 'tenant_id': '4c1bbfc9-a28b-4d93-8987-45db78e3269c', 'app_id': '79fa81c7-2760-40db-af54-74cb2fea2ce7', 'node_execution_id': '9aac28b6-b6fc-4aea-abdf-21da3227e621', 'inputs_file_id': '366621fa-4326-403e-8709-62e4d0de7367', 'outputs_file_id': '3cdec641-a452-4df0-a9af-4a1a30c27ea5'}
2025-08-21 15:34:49,581 INFO sqlalchemy.engine.Engine SELECT workflow_node_executions.id AS workflow_node_executions_id, workflow_node_executions.tenant_id AS workflow_node_executions_tenant_id, workflow_node_executions.app_id AS workflow_node_executions_app_id, workflow_node_executions.workflow_id AS workflow_node_executions_workflow_id, workflow_node_executions.triggered_from AS workflow_node_executions_triggered_from, workflow_node_executions.workflow_run_id AS workflow_node_executions_workflow_run_id, workflow_node_executions.index AS workflow_node_executions_index, workflow_node_executions.predecessor_node_id AS workflow_node_executions_predecessor_node_id, workflow_node_executions.node_execution_id AS workflow_node_executions_node_execution_id, workflow_node_executions.node_id AS workflow_node_executions_node_id, workflow_node_executions.node_type AS workflow_node_executions_node_type, workflow_node_executions.title AS workflow_node_executions_title, workflow_node_executions.inputs AS workflow_node_executions_inputs, workflow_node_executions.process_data AS workflow_node_executions_process_data, workflow_node_executions.outputs AS workflow_node_executions_outputs, workflow_node_executions.status AS workflow_node_executions_status, workflow_node_executions.error AS workflow_node_executions_error, workflow_node_executions.elapsed_time AS workflow_node_executions_elapsed_time, workflow_node_executions.execution_metadata AS workflow_node_executions_execution_metadata, workflow_node_executions.created_at AS workflow_node_executions_created_at, workflow_node_executions.created_by_role AS workflow_node_executions_created_by_role, workflow_node_executions.created_by AS workflow_node_executions_created_by, workflow_node_executions.finished_at AS workflow_node_executions_finished_at
FROM workflow_node_executions
WHERE workflow_node_executions.id = %(id_1)s::UUID
LIMIT %(param_1)s
2025-08-21 15:34:49,581 INFO sqlalchemy.engine.Engine [generated in 0.00009s] {'id_1': '9aac28b6-b6fc-4aea-abdf-21da3227e621', 'param_1': 1}
2025-08-21 15:34:49,585 INFO sqlalchemy.engine.Engine SELECT workflow_node_execution_offload.node_execution_id AS workflow_node_execution_offload_node_execution_id, workflow_node_execution_offload.id AS workflow_node_execution_offload_id, workflow_node_execution_offload.created_at AS workflow_node_execution_offload_created_at, workflow_node_execution_offload.tenant_id AS workflow_node_execution_offload_tenant_id, workflow_node_execution_offload.app_id AS workflow_node_execution_offload_app_id, workflow_node_execution_offload.inputs_file_id AS workflow_node_execution_offload_inputs_file_id, workflow_node_execution_offload.outputs_file_id AS workflow_node_execution_offload_outputs_file_id
FROM workflow_node_execution_offload
WHERE workflow_node_execution_offload.node_execution_id IN (%(primary_keys_1)s::UUID)
2025-08-21 15:34:49,585 INFO sqlalchemy.engine.Engine [generated in 0.00021s] {'primary_keys_1': '9aac28b6-b6fc-4aea-abdf-21da3227e621'}
2025-08-21 15:34:49,587 INFO sqlalchemy.engine.Engine SELECT upload_files.id AS upload_files_id, upload_files.tenant_id AS upload_files_tenant_id, upload_files.storage_type AS upload_files_storage_type, upload_files.key AS upload_files_key, upload_files.name AS upload_files_name, upload_files.size AS upload_files_size, upload_files.extension AS upload_files_extension, upload_files.mime_type AS upload_files_mime_type, upload_files.created_by_role AS upload_files_created_by_role, upload_files.created_by AS upload_files_created_by, upload_files.created_at AS upload_files_created_at, upload_files.used AS upload_files_used, upload_files.used_by AS upload_files_used_by, upload_files.used_at AS upload_files_used_at, upload_files.hash AS upload_files_hash, upload_files.source_url AS upload_files_source_url
FROM upload_files
WHERE upload_files.id IN (%(primary_keys_1)s::UUID)
2025-08-21 15:34:49,587 INFO sqlalchemy.engine.Engine [generated in 0.00012s] {'primary_keys_1': '3cdec641-a452-4df0-a9af-4a1a30c27ea5'}
2025-08-21 15:34:49,588 INFO sqlalchemy.engine.Engine SELECT upload_files.id AS upload_files_id, upload_files.tenant_id AS upload_files_tenant_id, upload_files.storage_type AS upload_files_storage_type, upload_files.key AS upload_files_key, upload_files.name AS upload_files_name, upload_files.size AS upload_files_size, upload_files.extension AS upload_files_extension, upload_files.mime_type AS upload_files_mime_type, upload_files.created_by_role AS upload_files_created_by_role, upload_files.created_by AS upload_files_created_by, upload_files.created_at AS upload_files_created_at, upload_files.used AS upload_files_used, upload_files.used_by AS upload_files_used_by, upload_files.used_at AS upload_files_used_at, upload_files.hash AS upload_files_hash, upload_files.source_url AS upload_files_source_url
FROM upload_files
WHERE upload_files.id IN (%(primary_keys_1)s::UUID)
2025-08-21 15:34:49,588 INFO sqlalchemy.engine.Engine [generated in 0.00010s] {'primary_keys_1': '366621fa-4326-403e-8709-62e4d0de7367'}
"""


"""
upload_file_id: 366621fa-4326-403e-8709-62e4d0de7367 3cdec641-a452-4df0-a9af-4a1a30c27ea5

workflow_node_executions_id: 9aac28b6-b6fc-4aea-abdf-21da3227e621

offload_id: 0198cd44-b7ea-724b-9e1b-5f062a2ef45b
"""

+ 77
- 90
api/tests/integration_tests/workflow/test_process_data_truncation_integration.py Прегледај датотеку

@dataclass @dataclass
class TruncationTestData: class TruncationTestData:
"""Test data for truncation scenarios.""" """Test data for truncation scenarios."""

name: str name: str
process_data: dict[str, any] process_data: dict[str, any]
should_truncate: bool should_truncate: bool


class TestProcessDataTruncationIntegration: class TestProcessDataTruncationIntegration:
"""Integration tests for process_data truncation functionality.""" """Integration tests for process_data truncation functionality."""
@pytest.fixture @pytest.fixture
def in_memory_db_engine(self): def in_memory_db_engine(self):
"""Create an in-memory SQLite database for testing.""" """Create an in-memory SQLite database for testing."""
engine = create_engine("sqlite:///:memory:") engine = create_engine("sqlite:///:memory:")
# Create minimal table structure for testing # Create minimal table structure for testing
with engine.connect() as conn: with engine.connect() as conn:
# Create workflow_node_executions table # Create workflow_node_executions table
conn.execute(text("""
conn.execute(
text("""
CREATE TABLE workflow_node_executions ( CREATE TABLE workflow_node_executions (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL, tenant_id TEXT NOT NULL,
created_by TEXT NOT NULL, created_by TEXT NOT NULL,
finished_at DATETIME finished_at DATETIME
) )
"""))
""")
)

# Create workflow_node_execution_offload table # Create workflow_node_execution_offload table
conn.execute(text("""
conn.execute(
text("""
CREATE TABLE workflow_node_execution_offload ( CREATE TABLE workflow_node_execution_offload (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
created_at DATETIME NOT NULL, created_at DATETIME NOT NULL,
outputs_file_id TEXT, outputs_file_id TEXT,
process_data_file_id TEXT process_data_file_id TEXT
) )
"""))
""")
)

# Create upload_files table (simplified) # Create upload_files table (simplified)
conn.execute(text("""
conn.execute(
text("""
CREATE TABLE upload_files ( CREATE TABLE upload_files (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL, tenant_id TEXT NOT NULL,
size INTEGER NOT NULL, size INTEGER NOT NULL,
created_at DATETIME NOT NULL created_at DATETIME NOT NULL
) )
"""))
""")
)

conn.commit() conn.commit()
return engine return engine


@pytest.fixture @pytest.fixture
def repository(self, in_memory_db_engine, mock_account): def repository(self, in_memory_db_engine, mock_account):
"""Create a repository instance for testing.""" """Create a repository instance for testing."""
session_factory = sessionmaker(bind=in_memory_db_engine) session_factory = sessionmaker(bind=in_memory_db_engine)
return SQLAlchemyWorkflowNodeExecutionRepository( return SQLAlchemyWorkflowNodeExecutionRepository(
session_factory=session_factory, session_factory=session_factory,
user=mock_account, user=mock_account,
) )


def create_test_execution( def create_test_execution(
self,
process_data: dict[str, any] | None = None,
execution_id: str = "test-execution-id"
self, process_data: dict[str, any] | None = None, execution_id: str = "test-execution-id"
) -> WorkflowNodeExecution: ) -> WorkflowNodeExecution:
"""Create a test execution with process_data.""" """Create a test execution with process_data."""
return WorkflowNodeExecution( return WorkflowNodeExecution(
"logs": ["log entry"] * 500, # Large array "logs": ["log entry"] * 500, # Large array
"config": {"setting": "value"}, "config": {"setting": "value"},
"status": "processing", "status": "processing",
"details": {"description": "y" * 5000} # Large string
"details": {"description": "y" * 5000}, # Large string
}, },
should_truncate=True, should_truncate=True,
expected_storage_interaction=True, expected_storage_interaction=True,
), ),
] ]


@patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config')
@patch('services.file_service.FileService.upload_file')
@patch('extensions.ext_storage.storage')
def test_end_to_end_process_data_truncation(
self,
mock_storage,
mock_upload_file,
mock_config,
repository
):
@patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config")
@patch("services.file_service.FileService.upload_file")
@patch("extensions.ext_storage.storage")
def test_end_to_end_process_data_truncation(self, mock_storage, mock_upload_file, mock_config, repository):
"""Test end-to-end process_data truncation functionality.""" """Test end-to-end process_data truncation functionality."""
# Configure truncation limits # Configure truncation limits
mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000
mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100
mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500

# Create large process_data that should be truncated # Create large process_data that should be truncated
large_process_data = { large_process_data = {
"large_field": "x" * 10000, # Exceeds string length limit "large_field": "x" * 10000, # Exceeds string length limit
"metadata": {"type": "processing", "timestamp": 1234567890}
"metadata": {"type": "processing", "timestamp": 1234567890},
} }
# Mock file upload # Mock file upload
mock_file = Mock() mock_file = Mock()
mock_file.id = "mock-process-data-file-id" mock_file.id = "mock-process-data-file-id"
mock_upload_file.return_value = mock_file mock_upload_file.return_value = mock_file
# Create and save execution # Create and save execution
execution = self.create_test_execution(process_data=large_process_data) execution = self.create_test_execution(process_data=large_process_data)
repository.save(execution) repository.save(execution)
# Verify truncation occurred # Verify truncation occurred
assert execution.process_data_truncated is True assert execution.process_data_truncated is True
truncated_data = execution.get_truncated_process_data() truncated_data = execution.get_truncated_process_data()
assert truncated_data is not None assert truncated_data is not None
assert truncated_data != large_process_data # Should be different due to truncation assert truncated_data != large_process_data # Should be different due to truncation
# Verify file upload was called for process_data # Verify file upload was called for process_data
assert mock_upload_file.called assert mock_upload_file.called
upload_args = mock_upload_file.call_args upload_args = mock_upload_file.call_args
assert "_process_data" in upload_args[1]["filename"] assert "_process_data" in upload_args[1]["filename"]


@patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config')
@patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config")
def test_small_process_data_no_truncation(self, mock_config, repository): def test_small_process_data_no_truncation(self, mock_config, repository):
"""Test that small process_data is not truncated.""" """Test that small process_data is not truncated."""
# Configure truncation limits # Configure truncation limits
mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000
mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100
mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500
# Create small process_data # Create small process_data
small_process_data = {"small": "data", "count": 5} small_process_data = {"small": "data", "count": 5}
execution = self.create_test_execution(process_data=small_process_data) execution = self.create_test_execution(process_data=small_process_data)
repository.save(execution) repository.save(execution)
# Verify no truncation occurred # Verify no truncation occurred
assert execution.process_data_truncated is False assert execution.process_data_truncated is False
assert execution.get_truncated_process_data() is None assert execution.get_truncated_process_data() is None
assert execution.get_response_process_data() == small_process_data assert execution.get_response_process_data() == small_process_data


@pytest.mark.parametrize("test_data", [
data for data in get_truncation_test_data(None)
], ids=[data.name for data in get_truncation_test_data(None)])
@patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config')
@patch('services.file_service.FileService.upload_file')
@pytest.mark.parametrize(
"test_data",
get_truncation_test_data(None),
ids=[data.name for data in get_truncation_test_data(None)],
)
@patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config")
@patch("services.file_service.FileService.upload_file")
def test_various_truncation_scenarios( def test_various_truncation_scenarios(
self,
mock_upload_file,
mock_config,
test_data: TruncationTestData,
repository
self, mock_upload_file, mock_config, test_data: TruncationTestData, repository
): ):
"""Test various process_data truncation scenarios.""" """Test various process_data truncation scenarios."""
# Configure truncation limits # Configure truncation limits
mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000
mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100
mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500
if test_data.expected_storage_interaction: if test_data.expected_storage_interaction:
# Mock file upload for truncation scenarios # Mock file upload for truncation scenarios
mock_file = Mock() mock_file = Mock()
mock_file.id = f"file-{test_data.name}" mock_file.id = f"file-{test_data.name}"
mock_upload_file.return_value = mock_file mock_upload_file.return_value = mock_file
execution = self.create_test_execution(process_data=test_data.process_data) execution = self.create_test_execution(process_data=test_data.process_data)
repository.save(execution) repository.save(execution)
# Verify truncation behavior matches expectations # Verify truncation behavior matches expectations
assert execution.process_data_truncated == test_data.should_truncate assert execution.process_data_truncated == test_data.should_truncate
if test_data.should_truncate: if test_data.should_truncate:
assert execution.get_truncated_process_data() is not None assert execution.get_truncated_process_data() is not None
assert execution.get_truncated_process_data() != test_data.process_data assert execution.get_truncated_process_data() != test_data.process_data
assert execution.get_truncated_process_data() is None assert execution.get_truncated_process_data() is None
assert execution.get_response_process_data() == test_data.process_data assert execution.get_response_process_data() == test_data.process_data


@patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config')
@patch('services.file_service.FileService.upload_file')
@patch('extensions.ext_storage.storage')
@patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config")
@patch("services.file_service.FileService.upload_file")
@patch("extensions.ext_storage.storage")
def test_load_truncated_execution_from_database( def test_load_truncated_execution_from_database(
self,
mock_storage,
mock_upload_file,
mock_config,
repository,
in_memory_db_engine
self, mock_storage, mock_upload_file, mock_config, repository, in_memory_db_engine
): ):
"""Test loading an execution with truncated process_data from database.""" """Test loading an execution with truncated process_data from database."""
# Configure truncation # Configure truncation
mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000
mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100
mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500
# Create and save execution with large process_data # Create and save execution with large process_data
large_process_data = {
"large_field": "x" * 10000,
"metadata": "info"
}
large_process_data = {"large_field": "x" * 10000, "metadata": "info"}

# Mock file upload # Mock file upload
mock_file = Mock() mock_file = Mock()
mock_file.id = "process-data-file-id" mock_file.id = "process-data-file-id"
mock_upload_file.return_value = mock_file mock_upload_file.return_value = mock_file
execution = self.create_test_execution(process_data=large_process_data) execution = self.create_test_execution(process_data=large_process_data)
repository.save(execution) repository.save(execution)
# Mock storage load for reconstruction # Mock storage load for reconstruction
mock_storage.load.return_value = json.dumps(large_process_data).encode() mock_storage.load.return_value = json.dumps(large_process_data).encode()
# Create a new repository instance to simulate fresh load # Create a new repository instance to simulate fresh load
session_factory = sessionmaker(bind=in_memory_db_engine) session_factory = sessionmaker(bind=in_memory_db_engine)
new_repository = SQLAlchemyWorkflowNodeExecutionRepository( new_repository = SQLAlchemyWorkflowNodeExecutionRepository(
app_id="test-app-id", app_id="test-app-id",
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
) )
# Load executions from database # Load executions from database
executions = new_repository.get_by_workflow_run("test-run-id") executions = new_repository.get_by_workflow_run("test-run-id")
assert len(executions) == 1 assert len(executions) == 1
loaded_execution = executions[0] loaded_execution = executions[0]
# Verify that full data is loaded # Verify that full data is loaded
assert loaded_execution.process_data == large_process_data assert loaded_execution.process_data == large_process_data
assert loaded_execution.process_data_truncated is True assert loaded_execution.process_data_truncated is True
# Verify truncated data for responses # Verify truncated data for responses
response_data = loaded_execution.get_response_process_data() response_data = loaded_execution.get_response_process_data()
assert response_data != large_process_data # Should be truncated version assert response_data != large_process_data # Should be truncated version
"""Test handling of None process_data.""" """Test handling of None process_data."""
execution = self.create_test_execution(process_data=None) execution = self.create_test_execution(process_data=None)
repository.save(execution) repository.save(execution)
# Should handle None gracefully # Should handle None gracefully
assert execution.process_data is None assert execution.process_data is None
assert execution.process_data_truncated is False assert execution.process_data_truncated is False
"""Test handling of empty process_data.""" """Test handling of empty process_data."""
execution = self.create_test_execution(process_data={}) execution = self.create_test_execution(process_data={})
repository.save(execution) repository.save(execution)
# Should handle empty dict gracefully # Should handle empty dict gracefully
assert execution.process_data == {} assert execution.process_data == {}
assert execution.process_data_truncated is False assert execution.process_data_truncated is False


class TestProcessDataTruncationApiIntegration: class TestProcessDataTruncationApiIntegration:
"""Integration tests for API responses with process_data truncation.""" """Integration tests for API responses with process_data truncation."""
def test_api_response_includes_truncated_flag(self): def test_api_response_includes_truncated_flag(self):
"""Test that API responses include the process_data_truncated flag.""" """Test that API responses include the process_data_truncated flag."""
from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter
from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity
from core.app.entities.queue_entities import QueueNodeSucceededEvent from core.app.entities.queue_entities import QueueNodeSucceededEvent
# Create execution with truncated process_data # Create execution with truncated process_data
execution = WorkflowNodeExecution( execution = WorkflowNodeExecution(
id="test-execution-id", id="test-execution-id",
created_at=datetime.now(), created_at=datetime.now(),
finished_at=datetime.now(), finished_at=datetime.now(),
) )
# Set truncated data # Set truncated data
execution.set_truncated_process_data({"large": "[TRUNCATED]"}) execution.set_truncated_process_data({"large": "[TRUNCATED]"})
# Create converter and event # Create converter and event
converter = WorkflowResponseConverter( converter = WorkflowResponseConverter(
application_generate_entity=Mock(
spec=WorkflowAppGenerateEntity,
app_config=Mock(tenant_id="test-tenant")
)
application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant"))
) )

event = QueueNodeSucceededEvent( event = QueueNodeSucceededEvent(
node_id="test-node-id", node_id="test-node-id",
node_type=NodeType.LLM, node_type=NodeType.LLM,
in_iteration_id=None, in_iteration_id=None,
in_loop_id=None, in_loop_id=None,
) )
# Generate response # Generate response
response = converter.workflow_node_finish_to_stream_response( response = converter.workflow_node_finish_to_stream_response(
event=event, event=event,
task_id="test-task-id", task_id="test-task-id",
workflow_node_execution=execution, workflow_node_execution=execution,
) )
# Verify response includes truncated flag and data # Verify response includes truncated flag and data
assert response is not None assert response is not None
assert response.data.process_data_truncated is True assert response.data.process_data_truncated is True
assert response.data.process_data == {"large": "[TRUNCATED]"} assert response.data.process_data == {"large": "[TRUNCATED]"}
# Verify response can be serialized # Verify response can be serialized
response_dict = response.to_dict() response_dict = response.to_dict()
assert "process_data_truncated" in response_dict["data"] assert "process_data_truncated" in response_dict["data"]
def test_workflow_run_fields_include_truncated_flag(self): def test_workflow_run_fields_include_truncated_flag(self):
"""Test that workflow run fields include process_data_truncated.""" """Test that workflow run fields include process_data_truncated."""
from fields.workflow_run_fields import workflow_run_node_execution_fields from fields.workflow_run_fields import workflow_run_node_execution_fields
# Verify the field is included in the definition # Verify the field is included in the definition
assert "process_data_truncated" in workflow_run_node_execution_fields assert "process_data_truncated" in workflow_run_node_execution_fields
# The field should be a Boolean field # The field should be a Boolean field
field = workflow_run_node_execution_fields["process_data_truncated"] field = workflow_run_node_execution_fields["process_data_truncated"]
from flask_restful import fields from flask_restful import fields
assert isinstance(field, fields.Boolean)

assert isinstance(field, fields.Boolean)

+ 30
- 29
api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_process_data.py Прегледај датотеку

from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter
from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity
from core.app.entities.queue_entities import QueueNodeRetryEvent, QueueNodeSucceededEvent from core.app.entities.queue_entities import QueueNodeRetryEvent, QueueNodeSucceededEvent
from core.helper.code_executor.code_executor import CodeLanguage
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
from core.workflow.nodes.code.entities import CodeNodeData
from core.workflow.nodes.enums import NodeType
from core.workflow.enums import NodeType
from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models import Account




@dataclass @dataclass


def create_workflow_response_converter(self) -> WorkflowResponseConverter: def create_workflow_response_converter(self) -> WorkflowResponseConverter:
"""Create a WorkflowResponseConverter for testing.""" """Create a WorkflowResponseConverter for testing."""

mock_entity = self.create_mock_generate_entity() mock_entity = self.create_mock_generate_entity()
return WorkflowResponseConverter(application_generate_entity=mock_entity)
mock_user = Mock(spec=Account)
mock_user.id = "test-user-id"
mock_user.name = "Test User"
mock_user.email = "test@example.com"

return WorkflowResponseConverter(application_generate_entity=mock_entity, user=mock_user)


def create_workflow_node_execution( def create_workflow_node_execution(
self, self,
return QueueNodeSucceededEvent( return QueueNodeSucceededEvent(
node_id="test-node-id", node_id="test-node-id",
node_type=NodeType.CODE, node_type=NodeType.CODE,
node_data=CodeNodeData(
title="test code",
variables=[],
code_language=CodeLanguage.PYTHON3,
code="",
outputs={},
),
node_execution_id=str(uuid.uuid4()), node_execution_id=str(uuid.uuid4()),
start_at=naive_utc_now(), start_at=naive_utc_now(),
parallel_id=None, parallel_id=None,
retry_index=1, retry_index=1,
node_id="test-node-id", node_id="test-node-id",
node_type=NodeType.CODE, node_type=NodeType.CODE,
node_data=CodeNodeData(
title="test code",
variables=[],
code_language=CodeLanguage.PYTHON3,
code="",
outputs={},
),
node_title="test code",
provider_type="built-in",
provider_id="code",
node_execution_id=str(uuid.uuid4()), node_execution_id=str(uuid.uuid4()),
start_at=naive_utc_now(), start_at=naive_utc_now(),
parallel_id=None, parallel_id=None,


@pytest.mark.parametrize( @pytest.mark.parametrize(
"scenario", "scenario",
[scenario for scenario in get_process_data_response_scenarios()],
get_process_data_response_scenarios(),
ids=[scenario.name for scenario in get_process_data_response_scenarios()], ids=[scenario.name for scenario in get_process_data_response_scenarios()],
) )
def test_node_finish_response_scenarios(self, scenario: ProcessDataResponseScenario): def test_node_finish_response_scenarios(self, scenario: ProcessDataResponseScenario):
"""Test various scenarios for node finish responses.""" """Test various scenarios for node finish responses."""

mock_user = Mock(spec=Account)
mock_user.id = "test-user-id"
mock_user.name = "Test User"
mock_user.email = "test@example.com"

converter = WorkflowResponseConverter( converter = WorkflowResponseConverter(
application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant"))
application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")),
user=mock_user,
) )


execution = WorkflowNodeExecution( execution = WorkflowNodeExecution(
event = QueueNodeSucceededEvent( event = QueueNodeSucceededEvent(
node_id="test-node-id", node_id="test-node-id",
node_type=NodeType.CODE, node_type=NodeType.CODE,
node_data=CodeNodeData(
title="test code",
variables=[],
code_language=CodeLanguage.PYTHON3,
code="",
outputs={},
),
node_execution_id=str(uuid.uuid4()), node_execution_id=str(uuid.uuid4()),
start_at=naive_utc_now(), start_at=naive_utc_now(),
parallel_id=None, parallel_id=None,


@pytest.mark.parametrize( @pytest.mark.parametrize(
"scenario", "scenario",
[scenario for scenario in get_process_data_response_scenarios()],
get_process_data_response_scenarios(),
ids=[scenario.name for scenario in get_process_data_response_scenarios()], ids=[scenario.name for scenario in get_process_data_response_scenarios()],
) )
def test_node_retry_response_scenarios(self, scenario: ProcessDataResponseScenario): def test_node_retry_response_scenarios(self, scenario: ProcessDataResponseScenario):
"""Test various scenarios for node retry responses.""" """Test various scenarios for node retry responses."""

mock_user = Mock(spec=Account)
mock_user.id = "test-user-id"
mock_user.name = "Test User"
mock_user.email = "test@example.com"

converter = WorkflowResponseConverter( converter = WorkflowResponseConverter(
application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant"))
application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")),
user=mock_user,
) )


execution = WorkflowNodeExecution( execution = WorkflowNodeExecution(

+ 1
- 1
api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py Прегледај датотеку

WorkflowNodeExecution, WorkflowNodeExecution,
WorkflowNodeExecutionStatus, WorkflowNodeExecutionStatus,
) )
from core.workflow.nodes.enums import NodeType
from core.workflow.enums import NodeType
from models import Account, WorkflowNodeExecutionTriggeredFrom from models import Account, WorkflowNodeExecutionTriggeredFrom
from models.enums import ExecutionOffLoadType from models.enums import ExecutionOffLoadType
from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload

+ 12
- 10
api/tests/unit_tests/core/schemas/test_resolver.py Прегледај датотеку



# Should be resolved to the actual qa_structure schema # Should be resolved to the actual qa_structure schema
assert resolved["type"] == "object" assert resolved["type"] == "object"
assert resolved["title"] == "Q&A Structure Schema"
assert resolved["title"] == "Q&A Structure"
assert "qa_chunks" in resolved["properties"] assert "qa_chunks" in resolved["properties"]
assert resolved["properties"]["qa_chunks"]["type"] == "array" assert resolved["properties"]["qa_chunks"]["type"] == "array"


# $ref should be resolved # $ref should be resolved
file_schema = resolved["properties"]["file_data"] file_schema = resolved["properties"]["file_data"]
assert file_schema["type"] == "object" assert file_schema["type"] == "object"
assert file_schema["title"] == "File Schema"
assert file_schema["title"] == "File"
assert "name" in file_schema["properties"] assert "name" in file_schema["properties"]


# Metadata fields should be removed from resolved schema # Metadata fields should be removed from resolved schema
# Items $ref should be resolved # Items $ref should be resolved
items_schema = resolved["items"] items_schema = resolved["items"]
assert items_schema["type"] == "array" assert items_schema["type"] == "array"
assert items_schema["title"] == "General Structure Schema"
assert items_schema["title"] == "General Structure"


def test_non_dify_ref_unchanged(self): def test_non_dify_ref_unchanged(self):
"""Test that non-Dify $refs are left unchanged""" """Test that non-Dify $refs are left unchanged"""


# Dify $ref should be resolved # Dify $ref should be resolved
assert resolved["properties"]["dify_data"]["type"] == "object" assert resolved["properties"]["dify_data"]["type"] == "object"
assert resolved["properties"]["dify_data"]["title"] == "File Schema"
assert resolved["properties"]["dify_data"]["title"] == "File"


def test_no_refs_schema_unchanged(self): def test_no_refs_schema_unchanged(self):
"""Test that schemas without $refs are returned unchanged""" """Test that schemas without $refs are returned unchanged"""


# Check refs are resolved # Check refs are resolved
assert resolved["properties"]["files"]["items"]["type"] == "object" assert resolved["properties"]["files"]["items"]["type"] == "object"
assert resolved["properties"]["files"]["items"]["title"] == "File Schema"
assert resolved["properties"]["files"]["items"]["title"] == "File"
assert resolved["properties"]["nested"]["properties"]["qa"]["type"] == "object" assert resolved["properties"]["nested"]["properties"]["qa"]["type"] == "object"
assert resolved["properties"]["nested"]["properties"]["qa"]["title"] == "Q&A Structure Schema"
assert resolved["properties"]["nested"]["properties"]["qa"]["title"] == "Q&A Structure"




class TestUtilityFunctions: class TestUtilityFunctions:
assert isinstance(resolved, list) assert isinstance(resolved, list)
assert len(resolved) == 3 assert len(resolved) == 3
assert resolved[0]["type"] == "object" assert resolved[0]["type"] == "object"
assert resolved[0]["title"] == "File Schema"
assert resolved[0]["title"] == "File"
assert resolved[1] == {"type": "string"} assert resolved[1] == {"type": "string"}
assert resolved[2]["type"] == "object" assert resolved[2]["type"] == "object"
assert resolved[2]["title"] == "Q&A Structure Schema"
assert resolved[2]["title"] == "Q&A Structure"


def test_cache_performance(self): def test_cache_performance(self):
"""Test that caching improves performance""" """Test that caching improves performance"""


# Cache should make it faster (more lenient check) # Cache should make it faster (more lenient check)
assert result1 == result2 assert result1 == result2
# Cache should provide some performance benefit
assert avg_time_with_cache <= avg_time_no_cache
# Cache should provide some performance benefit (allow for measurement variance)
# We expect cache to be faster, but allow for small timing variations
performance_ratio = avg_time_with_cache / avg_time_no_cache if avg_time_no_cache > 0 else 1.0
assert performance_ratio <= 2.0, f"Cache performance degraded too much: {performance_ratio}"


def test_fast_path_performance_no_refs(self): def test_fast_path_performance_no_refs(self):
"""Test that schemas without $refs use fast path and avoid deep copying""" """Test that schemas without $refs use fast path and avoid deep copying"""

+ 2
- 2
api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py Прегледај датотеку

import pytest import pytest


from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution
from core.workflow.nodes.enums import NodeType
from core.workflow.enums import NodeType




class TestWorkflowNodeExecutionProcessDataTruncation: class TestWorkflowNodeExecutionProcessDataTruncation:


@pytest.mark.parametrize( @pytest.mark.parametrize(
"scenario", "scenario",
[scenario for scenario in get_process_data_scenarios(None)],
get_process_data_scenarios(None),
ids=[scenario.name for scenario in get_process_data_scenarios(None)], ids=[scenario.name for scenario in get_process_data_scenarios(None)],
) )
def test_process_data_scenarios(self, scenario: ProcessDataScenario): def test_process_data_scenarios(self, scenario: ProcessDataScenario):

+ 66
- 35
api/tests/unit_tests/models/test_workflow_node_execution_offload.py Прегледај датотеку

from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload




class TestWorkflowNodeExecutionOffload:
"""Test WorkflowNodeExecutionOffload model with process_data fields."""

def test_get_exe(self):
WorkflowNodeExecutionOffload


class TestWorkflowNodeExecutionModel: class TestWorkflowNodeExecutionModel:
"""Test WorkflowNodeExecutionModel with process_data truncation features.""" """Test WorkflowNodeExecutionModel with process_data truncation features."""


def test_process_data_truncated_property_false_when_no_offload_data(self): def test_process_data_truncated_property_false_when_no_offload_data(self):
"""Test process_data_truncated returns False when no offload_data.""" """Test process_data_truncated returns False when no offload_data."""
execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()
execution.offload_data = None
execution.offload_data = []


assert execution.process_data_truncated is False assert execution.process_data_truncated is False


def test_process_data_truncated_property_false_when_no_process_data_file(self): def test_process_data_truncated_property_false_when_no_process_data_file(self):
"""Test process_data_truncated returns False when no process_data file.""" """Test process_data_truncated returns False when no process_data file."""
from models.enums import ExecutionOffLoadType

execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()


# Create real offload instance
offload_data = WorkflowNodeExecutionOffload()
offload_data.inputs_file_id = "inputs-file"
offload_data.outputs_file_id = "outputs-file"
offload_data.process_data_file_id = None # No process_data file
execution.offload_data = offload_data
# Create real offload instances for inputs and outputs but not process_data
inputs_offload = WorkflowNodeExecutionOffload()
inputs_offload.type_ = ExecutionOffLoadType.INPUTS
inputs_offload.file_id = "inputs-file"

outputs_offload = WorkflowNodeExecutionOffload()
outputs_offload.type_ = ExecutionOffLoadType.OUTPUTS
outputs_offload.file_id = "outputs-file"

execution.offload_data = [inputs_offload, outputs_offload]


assert execution.process_data_truncated is False assert execution.process_data_truncated is False


def test_process_data_truncated_property_true_when_process_data_file_exists(self): def test_process_data_truncated_property_true_when_process_data_file_exists(self):
"""Test process_data_truncated returns True when process_data file exists.""" """Test process_data_truncated returns True when process_data file exists."""
from models.enums import ExecutionOffLoadType

execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()


# Create a real offload instance rather than mock
offload_data = WorkflowNodeExecutionOffload()
offload_data.process_data_file_id = "process-data-file-id"
execution.offload_data = offload_data
# Create a real offload instance for process_data
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "process-data-file-id"
execution.offload_data = [process_data_offload]


assert execution.process_data_truncated is True assert execution.process_data_truncated is True


def test_load_full_process_data_with_no_offload_data(self): def test_load_full_process_data_with_no_offload_data(self):
"""Test load_full_process_data when no offload data exists.""" """Test load_full_process_data when no offload data exists."""
execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()
execution.offload_data = None
execution.process_data_dict = {"test": "data"}
execution.offload_data = []
execution.process_data = '{"test": "data"}'


# Mock session and storage # Mock session and storage
mock_session = Mock() mock_session = Mock()


def test_load_full_process_data_with_no_file(self): def test_load_full_process_data_with_no_file(self):
"""Test load_full_process_data when no process_data file exists.""" """Test load_full_process_data when no process_data file exists."""
from models.enums import ExecutionOffLoadType

execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()
execution.offload_data = self.create_mock_offload_data(process_data_file_id=None)
execution.process_data_dict = {"test": "data"}

# Create offload data for inputs only, not process_data
inputs_offload = WorkflowNodeExecutionOffload()
inputs_offload.type_ = ExecutionOffLoadType.INPUTS
inputs_offload.file_id = "inputs-file"

execution.offload_data = [inputs_offload]
execution.process_data = '{"test": "data"}'


# Mock session and storage # Mock session and storage
mock_session = Mock() mock_session = Mock()


def test_load_full_process_data_with_file(self): def test_load_full_process_data_with_file(self):
"""Test load_full_process_data when process_data file exists.""" """Test load_full_process_data when process_data file exists."""
from models.enums import ExecutionOffLoadType

execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()
offload_data = self.create_mock_offload_data(process_data_file_id="file-id")
execution.offload_data = offload_data
execution.process_data_dict = {"truncated": "data"}

# Create process_data offload
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "file-id"

execution.offload_data = [process_data_offload]
execution.process_data = '{"truncated": "data"}'


# Mock session and storage # Mock session and storage
mock_session = Mock() mock_session = Mock()


def test_consistency_with_inputs_outputs_truncation(self): def test_consistency_with_inputs_outputs_truncation(self):
"""Test that process_data truncation behaves consistently with inputs/outputs.""" """Test that process_data truncation behaves consistently with inputs/outputs."""
from models.enums import ExecutionOffLoadType

execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()


# Test all three truncation properties together
offload_data = self.create_mock_offload_data(
inputs_file_id="inputs-file", outputs_file_id="outputs-file", process_data_file_id="process-data-file"
)
execution.offload_data = offload_data
# Create offload data for all three types
inputs_offload = WorkflowNodeExecutionOffload()
inputs_offload.type_ = ExecutionOffLoadType.INPUTS
inputs_offload.file_id = "inputs-file"

outputs_offload = WorkflowNodeExecutionOffload()
outputs_offload.type_ = ExecutionOffLoadType.OUTPUTS
outputs_offload.file_id = "outputs-file"


# All should be truncated
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "process-data-file"

execution.offload_data = [inputs_offload, outputs_offload, process_data_offload]

# All three should be truncated
assert execution.inputs_truncated is True assert execution.inputs_truncated is True
assert execution.outputs_truncated is True assert execution.outputs_truncated is True
assert execution.process_data_truncated is True assert execution.process_data_truncated is True


def test_mixed_truncation_states(self): def test_mixed_truncation_states(self):
"""Test mixed states of truncation.""" """Test mixed states of truncation."""
from models.enums import ExecutionOffLoadType

execution = WorkflowNodeExecutionModel() execution = WorkflowNodeExecutionModel()


# Only process_data is truncated # Only process_data is truncated
offload_data = self.create_mock_offload_data(
inputs_file_id=None, outputs_file_id=None, process_data_file_id="process-data-file"
)
execution.offload_data = offload_data
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "process-data-file"

execution.offload_data = [process_data_offload]


assert execution.inputs_truncated is False assert execution.inputs_truncated is False
assert execution.outputs_truncated is False assert execution.outputs_truncated is False

+ 102
- 91
api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py Прегледај датотеку

import json import json
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from typing import Any
from unittest.mock import MagicMock, Mock, patch from unittest.mock import MagicMock, Mock, patch


import pytest import pytest
_InputsOutputsTruncationResult, _InputsOutputsTruncationResult,
) )
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution
from core.workflow.nodes.enums import NodeType
from core.workflow.enums import NodeType
from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom
from models.model import UploadFile from models.model import UploadFile
from models.workflow import WorkflowNodeExecutionOffload from models.workflow import WorkflowNodeExecutionOffload
"""Create a repository instance for testing.""" """Create a repository instance for testing."""
mock_account = self.create_mock_account() mock_account = self.create_mock_account()
mock_session_factory = self.create_mock_session_factory() mock_session_factory = self.create_mock_session_factory()
repository = SQLAlchemyWorkflowNodeExecutionRepository( repository = SQLAlchemyWorkflowNodeExecutionRepository(
session_factory=mock_session_factory, session_factory=mock_session_factory,
user=mock_account, user=mock_account,
app_id="test-app-id", app_id="test-app-id",
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
) )
if mock_file_service: if mock_file_service:
repository._file_service = mock_file_service repository._file_service = mock_file_service
return repository return repository


def create_workflow_node_execution( def create_workflow_node_execution(
self, self,
process_data: dict[str, any] | None = None,
process_data: dict[str, Any] | None = None,
execution_id: str = "test-execution-id", execution_id: str = "test-execution-id",
) -> WorkflowNodeExecution: ) -> WorkflowNodeExecution:
"""Create a WorkflowNodeExecution instance for testing.""" """Create a WorkflowNodeExecution instance for testing."""
created_at=datetime.now(), created_at=datetime.now(),
) )


@patch('core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config')
@patch("core.repositories.sqlalchemy_workflow_node_execution_repository.dify_config")
def test_to_db_model_with_small_process_data(self, mock_config): def test_to_db_model_with_small_process_data(self, mock_config):
"""Test _to_db_model with small process_data that doesn't need truncation.""" """Test _to_db_model with small process_data that doesn't need truncation."""
mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000 mock_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE = 1000
mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100 mock_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH = 100
mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500 mock_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH = 500
repository = self.create_repository() repository = self.create_repository()
small_process_data = {"small": "data", "count": 5} small_process_data = {"small": "data", "count": 5}
execution = self.create_workflow_node_execution(process_data=small_process_data) execution = self.create_workflow_node_execution(process_data=small_process_data)
with patch.object(repository, '_truncate_and_upload', return_value=None) as mock_truncate:
with patch.object(repository, "_truncate_and_upload", return_value=None) as mock_truncate:
db_model = repository._to_db_model(execution) db_model = repository._to_db_model(execution)
# Should try to truncate but return None (no truncation needed) # Should try to truncate but return None (no truncation needed)
mock_truncate.assert_called_once_with(
small_process_data,
execution.id,
"_process_data"
)
mock_truncate.assert_called_once_with(small_process_data, execution.id, "_process_data")

# Process data should be stored directly in database # Process data should be stored directly in database
assert db_model.process_data is not None assert db_model.process_data is not None
stored_data = json.loads(db_model.process_data) stored_data = json.loads(db_model.process_data)
assert stored_data == small_process_data assert stored_data == small_process_data
# No offload data should be created for process_data # No offload data should be created for process_data
assert db_model.offload_data is None assert db_model.offload_data is None


def test_to_db_model_with_large_process_data(self): def test_to_db_model_with_large_process_data(self):
"""Test _to_db_model with large process_data that needs truncation.""" """Test _to_db_model with large process_data that needs truncation."""
repository = self.create_repository() repository = self.create_repository()
# Create large process_data that would need truncation # Create large process_data that would need truncation
large_process_data = { large_process_data = {
"large_field": "x" * 10000, # Very large string "large_field": "x" * 10000, # Very large string
"metadata": {"type": "processing", "timestamp": 1234567890}
"metadata": {"type": "processing", "timestamp": 1234567890},
} }
# Mock truncation result # Mock truncation result
truncated_data = {
"large_field": "[TRUNCATED]",
"metadata": {"type": "processing", "timestamp": 1234567890}
}
truncated_data = {"large_field": "[TRUNCATED]", "metadata": {"type": "processing", "timestamp": 1234567890}}

mock_upload_file = Mock(spec=UploadFile) mock_upload_file = Mock(spec=UploadFile)
mock_upload_file.id = "mock-file-id" mock_upload_file.id = "mock-file-id"

mock_offload = Mock(spec=WorkflowNodeExecutionOffload)
truncation_result = _InputsOutputsTruncationResult( truncation_result = _InputsOutputsTruncationResult(
truncated_value=truncated_data,
file=mock_upload_file
truncated_value=truncated_data, file=mock_upload_file, offload=mock_offload
) )
execution = self.create_workflow_node_execution(process_data=large_process_data) execution = self.create_workflow_node_execution(process_data=large_process_data)
with patch.object(repository, '_truncate_and_upload', return_value=truncation_result) as mock_truncate:
with patch.object(repository, "_truncate_and_upload", return_value=truncation_result) as mock_truncate:
db_model = repository._to_db_model(execution) db_model = repository._to_db_model(execution)
# Should call truncate with correct parameters # Should call truncate with correct parameters
mock_truncate.assert_called_once_with(
large_process_data,
execution.id,
"_process_data"
)
mock_truncate.assert_called_once_with(large_process_data, execution.id, "_process_data")

# Truncated data should be stored in database # Truncated data should be stored in database
assert db_model.process_data is not None assert db_model.process_data is not None
stored_data = json.loads(db_model.process_data) stored_data = json.loads(db_model.process_data)
assert stored_data == truncated_data assert stored_data == truncated_data
# Domain model should have truncated data set # Domain model should have truncated data set
assert execution.process_data_truncated is True assert execution.process_data_truncated is True
assert execution.get_truncated_process_data() == truncated_data assert execution.get_truncated_process_data() == truncated_data
# Offload data should be created # Offload data should be created
assert db_model.offload_data is not None assert db_model.offload_data is not None
assert db_model.offload_data.process_data_file == mock_upload_file
assert db_model.offload_data.process_data_file_id == "mock-file-id"
assert len(db_model.offload_data) > 0
# Find the process_data offload entry
process_data_offload = next(
(item for item in db_model.offload_data if hasattr(item, "file_id") and item.file_id == "mock-file-id"),
None,
)
assert process_data_offload is not None


def test_to_db_model_with_none_process_data(self): def test_to_db_model_with_none_process_data(self):
"""Test _to_db_model with None process_data.""" """Test _to_db_model with None process_data."""
repository = self.create_repository() repository = self.create_repository()
execution = self.create_workflow_node_execution(process_data=None) execution = self.create_workflow_node_execution(process_data=None)
with patch.object(repository, '_truncate_and_upload') as mock_truncate:
with patch.object(repository, "_truncate_and_upload") as mock_truncate:
db_model = repository._to_db_model(execution) db_model = repository._to_db_model(execution)
# Should not call truncate for None data # Should not call truncate for None data
mock_truncate.assert_not_called() mock_truncate.assert_not_called()
# Process data should be None # Process data should be None
assert db_model.process_data is None assert db_model.process_data is None
# No offload data should be created # No offload data should be created
assert db_model.offload_data is None
assert db_model.offload_data == []


def test_to_domain_model_with_offloaded_process_data(self): def test_to_domain_model_with_offloaded_process_data(self):
"""Test _to_domain_model with offloaded process_data.""" """Test _to_domain_model with offloaded process_data."""
repository = self.create_repository() repository = self.create_repository()
# Create mock database model with offload data # Create mock database model with offload data
db_model = Mock(spec=WorkflowNodeExecutionModel) db_model = Mock(spec=WorkflowNodeExecutionModel)
db_model.id = "test-execution-id" db_model.id = "test-execution-id"
db_model.elapsed_time = 1.5 db_model.elapsed_time = 1.5
db_model.created_at = datetime.now() db_model.created_at = datetime.now()
db_model.finished_at = None db_model.finished_at = None
# Mock truncated process_data from database # Mock truncated process_data from database
truncated_process_data = {"large_field": "[TRUNCATED]", "metadata": "info"} truncated_process_data = {"large_field": "[TRUNCATED]", "metadata": "info"}
db_model.process_data_dict = truncated_process_data db_model.process_data_dict = truncated_process_data
db_model.inputs_dict = None db_model.inputs_dict = None
db_model.outputs_dict = None db_model.outputs_dict = None
db_model.execution_metadata_dict = {} db_model.execution_metadata_dict = {}
# Mock offload data with process_data file # Mock offload data with process_data file
mock_offload_data = Mock(spec=WorkflowNodeExecutionOffload) mock_offload_data = Mock(spec=WorkflowNodeExecutionOffload)
mock_offload_data.inputs_file_id = None mock_offload_data.inputs_file_id = None
mock_offload_data.outputs_file_id = None mock_offload_data.outputs_file_id = None
mock_offload_data.outputs_file = None mock_offload_data.outputs_file = None
mock_offload_data.process_data_file_id = "process-data-file-id" mock_offload_data.process_data_file_id = "process-data-file-id"
mock_process_data_file = Mock(spec=UploadFile) mock_process_data_file = Mock(spec=UploadFile)
mock_offload_data.process_data_file = mock_process_data_file mock_offload_data.process_data_file = mock_process_data_file
db_model.offload_data = mock_offload_data
db_model.offload_data = [mock_offload_data]
# Mock the file loading # Mock the file loading
original_process_data = {
"large_field": "x" * 10000,
"metadata": "info"
}
with patch.object(repository, '_load_file', return_value=original_process_data) as mock_load:
original_process_data = {"large_field": "x" * 10000, "metadata": "info"}

with patch.object(repository, "_load_file", return_value=original_process_data) as mock_load:
domain_model = repository._to_domain_model(db_model) domain_model = repository._to_domain_model(db_model)

# Should load the file # Should load the file
mock_load.assert_called_once_with(mock_process_data_file) mock_load.assert_called_once_with(mock_process_data_file)
# Domain model should have original data # Domain model should have original data
assert domain_model.process_data == original_process_data assert domain_model.process_data == original_process_data
# Domain model should have truncated data set # Domain model should have truncated data set
assert domain_model.process_data_truncated is True assert domain_model.process_data_truncated is True
assert domain_model.get_truncated_process_data() == truncated_process_data assert domain_model.get_truncated_process_data() == truncated_process_data
def test_to_domain_model_without_offload_data(self): def test_to_domain_model_without_offload_data(self):
"""Test _to_domain_model without offload data.""" """Test _to_domain_model without offload data."""
repository = self.create_repository() repository = self.create_repository()
# Create mock database model without offload data # Create mock database model without offload data
db_model = Mock(spec=WorkflowNodeExecutionModel) db_model = Mock(spec=WorkflowNodeExecutionModel)
db_model.id = "test-execution-id" db_model.id = "test-execution-id"
db_model.elapsed_time = 1.5 db_model.elapsed_time = 1.5
db_model.created_at = datetime.now() db_model.created_at = datetime.now()
db_model.finished_at = None db_model.finished_at = None
process_data = {"normal": "data"} process_data = {"normal": "data"}
db_model.process_data_dict = process_data db_model.process_data_dict = process_data
db_model.inputs_dict = None db_model.inputs_dict = None
db_model.outputs_dict = None db_model.outputs_dict = None
db_model.execution_metadata_dict = {} db_model.execution_metadata_dict = {}
db_model.offload_data = None db_model.offload_data = None
domain_model = repository._to_domain_model(db_model) domain_model = repository._to_domain_model(db_model)
# Domain model should have the data from database # Domain model should have the data from database
assert domain_model.process_data == process_data assert domain_model.process_data == process_data
# Should not be truncated # Should not be truncated
assert domain_model.process_data_truncated is False assert domain_model.process_data_truncated is False
assert domain_model.get_truncated_process_data() is None assert domain_model.get_truncated_process_data() is None
@dataclass @dataclass
class TruncationScenario: class TruncationScenario:
"""Test scenario for truncation functionality.""" """Test scenario for truncation functionality."""

name: str name: str
process_data: dict[str, any] | None
process_data: dict[str, Any] | None
should_truncate: bool should_truncate: bool
expected_truncated: bool = False expected_truncated: bool = False




class TestProcessDataTruncationScenarios: class TestProcessDataTruncationScenarios:
"""Test various scenarios for process_data truncation.""" """Test various scenarios for process_data truncation."""
def get_truncation_scenarios(self) -> list[TruncationScenario]: def get_truncation_scenarios(self) -> list[TruncationScenario]:
"""Create test scenarios for truncation.""" """Create test scenarios for truncation."""
return [ return [
should_truncate=False, should_truncate=False,
), ),
] ]
@pytest.mark.parametrize("scenario", [
scenario for scenario in get_truncation_scenarios(None)
], ids=[scenario.name for scenario in get_truncation_scenarios(None)])

@pytest.mark.parametrize(
"scenario",
[
TruncationScenario("none_data", None, False, False),
TruncationScenario("small_data", {"small": "data"}, False, False),
TruncationScenario("large_data", {"large": "x" * 10000}, True, True),
TruncationScenario("empty_data", {}, False, False),
],
ids=["none_data", "small_data", "large_data", "empty_data"],
)
def test_process_data_truncation_scenarios(self, scenario: TruncationScenario): def test_process_data_truncation_scenarios(self, scenario: TruncationScenario):
"""Test various process_data truncation scenarios.""" """Test various process_data truncation scenarios."""
repository = SQLAlchemyWorkflowNodeExecutionRepository( repository = SQLAlchemyWorkflowNodeExecutionRepository(
app_id="test-app", app_id="test-app",
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
) )
execution = WorkflowNodeExecution( execution = WorkflowNodeExecution(
id="test-execution-id", id="test-execution-id",
workflow_id="test-workflow-id", workflow_id="test-workflow-id",
process_data=scenario.process_data, process_data=scenario.process_data,
created_at=datetime.now(), created_at=datetime.now(),
) )
# Mock truncation behavior # Mock truncation behavior
if scenario.should_truncate: if scenario.should_truncate:
truncated_data = {"truncated": True} truncated_data = {"truncated": True}
mock_file = Mock(spec=UploadFile, id="file-id") mock_file = Mock(spec=UploadFile, id="file-id")
mock_offload = Mock(spec=WorkflowNodeExecutionOffload)
truncation_result = _InputsOutputsTruncationResult( truncation_result = _InputsOutputsTruncationResult(
truncated_value=truncated_data,
file=mock_file
truncated_value=truncated_data, file=mock_file, offload=mock_offload
) )
with patch.object(repository, '_truncate_and_upload', return_value=truncation_result):
with patch.object(repository, "_truncate_and_upload", return_value=truncation_result):
db_model = repository._to_db_model(execution) db_model = repository._to_db_model(execution)
# Should create offload data # Should create offload data
assert db_model.offload_data is not None assert db_model.offload_data is not None
assert db_model.offload_data.process_data_file_id == "file-id"
assert len(db_model.offload_data) > 0
# Find the process_data offload entry
process_data_offload = next(
(item for item in db_model.offload_data if hasattr(item, "file_id") and item.file_id == "file-id"),
None,
)
assert process_data_offload is not None
assert execution.process_data_truncated == scenario.expected_truncated assert execution.process_data_truncated == scenario.expected_truncated
else: else:
with patch.object(repository, '_truncate_and_upload', return_value=None):
with patch.object(repository, "_truncate_and_upload", return_value=None):
db_model = repository._to_db_model(execution) db_model = repository._to_db_model(execution)
# Should not create offload data or set truncation # Should not create offload data or set truncation
if scenario.process_data is None: if scenario.process_data is None:
assert db_model.offload_data is None
assert db_model.offload_data == []
assert db_model.process_data is None assert db_model.process_data is None
else: else:
# For small data, might have offload_data from other fields but not process_data # For small data, might have offload_data from other fields but not process_data
if db_model.offload_data: if db_model.offload_data:
assert db_model.offload_data.process_data_file_id is None
assert db_model.offload_data.process_data_file is None
assert execution.process_data_truncated is False
# Check that no process_data offload entries exist
process_data_offloads = [
item
for item in db_model.offload_data
if hasattr(item, "type_") and item.type_.value == "process_data"
]
assert len(process_data_offloads) == 0

assert execution.process_data_truncated is False

+ 51
- 170
api/tests/unit_tests/services/test_variable_truncator.py Прегледај датотеку

StringSegment, StringSegment,
) )
from services.variable_truncator import ( from services.variable_truncator import (
ARRAY_CHAR_LIMIT,
LARGE_VARIABLE_THRESHOLD,
OBJECT_CHAR_LIMIT,
MaxDepthExceededError, MaxDepthExceededError,
TruncationResult, TruncationResult,
UnknownTypeError, UnknownTypeError,
assert VariableTruncator.calculate_json_size("") == 2 # Just quotes assert VariableTruncator.calculate_json_size("") == 2 # Just quotes


# Unicode string # Unicode string
unicode_text = "你好"
expected_size = len(unicode_text.encode("utf-8")) + 2
assert VariableTruncator.calculate_json_size(unicode_text) == expected_size
assert VariableTruncator.calculate_json_size("你好") == 4


def test_number_size_calculation(self, truncator): def test_number_size_calculation(self, truncator):
"""Test JSON size calculation for numbers.""" """Test JSON size calculation for numbers."""
# Create deeply nested structure # Create deeply nested structure
nested: dict[str, Any] = {"level": 0} nested: dict[str, Any] = {"level": 0}
current = nested current = nested
for i in range(25): # Create deep nesting
for i in range(105): # Create deep nesting
current["next"] = {"level": i + 1} current["next"] = {"level": i + 1}
current = current["next"] current = current["next"]






class TestStringTruncation: class TestStringTruncation:
LENGTH_LIMIT = 10
"""Test string truncation functionality.""" """Test string truncation functionality."""


@pytest.fixture @pytest.fixture
def test_short_string_no_truncation(self, small_truncator): def test_short_string_no_truncation(self, small_truncator):
"""Test that short strings are not truncated.""" """Test that short strings are not truncated."""
short_str = "hello" short_str = "hello"
result, was_truncated = small_truncator._truncate_string(short_str)
assert result == short_str
assert was_truncated is False
result = small_truncator._truncate_string(short_str, self.LENGTH_LIMIT)
assert result.value == short_str
assert result.truncated is False
assert result.value_size == VariableTruncator.calculate_json_size(short_str)


def test_long_string_truncation(self, small_truncator: VariableTruncator): def test_long_string_truncation(self, small_truncator: VariableTruncator):
"""Test that long strings are truncated with ellipsis.""" """Test that long strings are truncated with ellipsis."""
long_str = "this is a very long string that exceeds the limit" long_str = "this is a very long string that exceeds the limit"
result, was_truncated = small_truncator._truncate_string(long_str)
result = small_truncator._truncate_string(long_str, self.LENGTH_LIMIT)


assert was_truncated is True
assert result == long_str[:7] + "..."
assert len(result) == 10 # 10 chars + "..."
assert result.truncated is True
assert result.value == long_str[:5] + "..."
assert result.value_size == 10 # 10 chars + "..."


def test_exact_limit_string(self, small_truncator):
def test_exact_limit_string(self, small_truncator: VariableTruncator):
"""Test string exactly at limit.""" """Test string exactly at limit."""
exact_str = "1234567890" # Exactly 10 chars exact_str = "1234567890" # Exactly 10 chars
result, was_truncated = small_truncator._truncate_string(exact_str)
assert result == exact_str
assert was_truncated is False
result = small_truncator._truncate_string(exact_str, self.LENGTH_LIMIT)
assert result.value == "12345..."
assert result.truncated is True
assert result.value_size == 10




class TestArrayTruncation: class TestArrayTruncation:
def small_truncator(self): def small_truncator(self):
return VariableTruncator(array_element_limit=3, max_size_bytes=100) return VariableTruncator(array_element_limit=3, max_size_bytes=100)


def test_small_array_no_truncation(self, small_truncator):
def test_small_array_no_truncation(self, small_truncator: VariableTruncator):
"""Test that small arrays are not truncated.""" """Test that small arrays are not truncated."""
small_array = [1, 2] small_array = [1, 2]
result, was_truncated = small_truncator._truncate_array(small_array, 1000)
assert result == small_array
assert was_truncated is False
result = small_truncator._truncate_array(small_array, 1000)
assert result.value == small_array
assert result.truncated is False


def test_array_element_limit_truncation(self, small_truncator):
def test_array_element_limit_truncation(self, small_truncator: VariableTruncator):
"""Test that arrays over element limit are truncated.""" """Test that arrays over element limit are truncated."""
large_array = [1, 2, 3, 4, 5, 6] # Exceeds limit of 3 large_array = [1, 2, 3, 4, 5, 6] # Exceeds limit of 3
result, was_truncated = small_truncator._truncate_array(large_array, 1000)
result = small_truncator._truncate_array(large_array, 1000)


assert was_truncated is True
assert len(result) == 3
assert result == [1, 2, 3]
assert result.truncated is True
assert result.value == [1, 2, 3]


def test_array_size_budget_truncation(self, small_truncator):
def test_array_size_budget_truncation(self, small_truncator: VariableTruncator):
"""Test array truncation due to size budget constraints.""" """Test array truncation due to size budget constraints."""
# Create array with strings that will exceed size budget # Create array with strings that will exceed size budget
large_strings = ["very long string " * 5, "another long string " * 5] large_strings = ["very long string " * 5, "another long string " * 5]
result, was_truncated = small_truncator._truncate_array(large_strings, 50)
result = small_truncator._truncate_array(large_strings, 50)


assert was_truncated is True
assert result.truncated is True
# Should have truncated the strings within the array # Should have truncated the strings within the array
for item in result:
for item in result.value:
assert isinstance(item, str) assert isinstance(item, str)
print(result)
assert len(_compact_json_dumps(result).encode()) <= 50
assert VariableTruncator.calculate_json_size(result.value) <= 50


def test_array_with_nested_objects(self, small_truncator): def test_array_with_nested_objects(self, small_truncator):
"""Test array truncation with nested objects.""" """Test array truncation with nested objects."""
{"name": "item2", "data": "more data"}, {"name": "item2", "data": "more data"},
{"name": "item3", "data": "even more data"}, {"name": "item3", "data": "even more data"},
] ]
result, was_truncated = small_truncator._truncate_array(nested_array, 80)
result = small_truncator._truncate_array(nested_array, 30)


assert isinstance(result, list)
assert len(result) <= 3
# Should have processed nested objects appropriately
assert isinstance(result.value, list)
assert len(result.value) <= 3
for item in result.value:
assert isinstance(item, dict)




class TestObjectTruncation: class TestObjectTruncation:
def test_small_object_no_truncation(self, small_truncator): def test_small_object_no_truncation(self, small_truncator):
"""Test that small objects are not truncated.""" """Test that small objects are not truncated."""
small_obj = {"a": 1, "b": 2} small_obj = {"a": 1, "b": 2}
result, was_truncated = small_truncator._truncate_object(small_obj, 1000)
assert result == small_obj
assert was_truncated is False
result = small_truncator._truncate_object(small_obj, 1000)
assert result.value == small_obj
assert result.truncated is False


def test_empty_object_no_truncation(self, small_truncator): def test_empty_object_no_truncation(self, small_truncator):
"""Test that empty objects are not truncated.""" """Test that empty objects are not truncated."""
empty_obj = {} empty_obj = {}
result, was_truncated = small_truncator._truncate_object(empty_obj, 100)
assert result == empty_obj
assert was_truncated is False
result = small_truncator._truncate_object(empty_obj, 100)
assert result.value == empty_obj
assert result.truncated is False


def test_object_value_truncation(self, small_truncator): def test_object_value_truncation(self, small_truncator):
"""Test object truncation where values are truncated to fit budget.""" """Test object truncation where values are truncated to fit budget."""
"key2": "another long string " * 10, "key2": "another long string " * 10,
"key3": "third long string " * 10, "key3": "third long string " * 10,
} }
result, was_truncated = small_truncator._truncate_object(obj_with_long_values, 80)
result = small_truncator._truncate_object(obj_with_long_values, 80)


assert was_truncated is True
assert isinstance(result, dict)
assert result.truncated is True
assert isinstance(result.value, dict)


# Keys should be preserved (deterministic order due to sorting)
if result: # Only check if result is not empty
assert list(result.keys()) == sorted(result.keys())
assert set(result.value.keys()).issubset(obj_with_long_values.keys())


# Values should be truncated if they exist # Values should be truncated if they exist
for key, value in result.items():
for key, value in result.value.items():
if isinstance(value, str): if isinstance(value, str):
original_value = obj_with_long_values[key] original_value = obj_with_long_values[key]
# Value should be same or smaller # Value should be same or smaller
def test_object_key_dropping(self, small_truncator): def test_object_key_dropping(self, small_truncator):
"""Test object truncation where keys are dropped due to size constraints.""" """Test object truncation where keys are dropped due to size constraints."""
large_obj = {f"key{i:02d}": f"value{i}" for i in range(20)} large_obj = {f"key{i:02d}": f"value{i}" for i in range(20)}
result, was_truncated = small_truncator._truncate_object(large_obj, 50)
result = small_truncator._truncate_object(large_obj, 50)


assert was_truncated is True
assert len(result) < len(large_obj)
assert result.truncated is True
assert len(result.value) < len(large_obj)


# Should maintain sorted key order # Should maintain sorted key order
result_keys = list(result.keys())
result_keys = list(result.value.keys())
assert result_keys == sorted(result_keys) assert result_keys == sorted(result_keys)


def test_object_with_nested_structures(self, small_truncator): def test_object_with_nested_structures(self, small_truncator):
"""Test object truncation with nested arrays and objects.""" """Test object truncation with nested arrays and objects."""
nested_obj = {"simple": "value", "array": [1, 2, 3, 4, 5], "nested": {"inner": "data", "more": ["a", "b", "c"]}} nested_obj = {"simple": "value", "array": [1, 2, 3, 4, 5], "nested": {"inner": "data", "more": ["a", "b", "c"]}}
result, was_truncated = small_truncator._truncate_object(nested_obj, 60)
result = small_truncator._truncate_object(nested_obj, 60)


assert isinstance(result, dict)
# Should handle nested structures appropriately
assert isinstance(result.value, dict)




class TestSegmentBasedTruncation: class TestSegmentBasedTruncation:
assert len(result.result.value) <= 1000 # Much smaller than original assert len(result.result.value) <= 1000 # Much smaller than original




class TestTruncationHelperMethods:
"""Test helper methods used in truncation."""

@pytest.fixture
def truncator(self):
return VariableTruncator()

def test_truncate_item_to_budget_string(self, truncator):
"""Test _truncate_item_to_budget with string input."""
item = "this is a long string"
budget = 15
result, was_truncated = truncator._truncate_item_to_budget(item, budget)

assert isinstance(result, str)
# Should be truncated to fit budget
if was_truncated:
assert len(result) <= budget
assert result.endswith("...")

def test_truncate_item_to_budget_dict(self, truncator):
"""Test _truncate_item_to_budget with dict input."""
item = {"key": "value", "longer": "longer value"}
budget = 30
result, was_truncated = truncator._truncate_item_to_budget(item, budget)

assert isinstance(result, dict)
# Should apply object truncation logic

def test_truncate_item_to_budget_list(self, truncator):
"""Test _truncate_item_to_budget with list input."""
item = [1, 2, 3, 4, 5]
budget = 15
result, was_truncated = truncator._truncate_item_to_budget(item, budget)

assert isinstance(result, list)
# Should apply array truncation logic

def test_truncate_item_to_budget_other_types(self, truncator):
"""Test _truncate_item_to_budget with other types."""
# Small number that fits
result, was_truncated = truncator._truncate_item_to_budget(123, 10)
assert result == 123
assert was_truncated is False

# Large number that might not fit - should convert to string if needed
large_num = 123456789012345
result, was_truncated = truncator._truncate_item_to_budget(large_num, 5)
if was_truncated:
assert isinstance(result, str)

def test_truncate_value_to_budget_string(self, truncator):
"""Test _truncate_value_to_budget with string input."""
value = "x" * 100
budget = 20
result, was_truncated = truncator._truncate_value_to_budget(value, budget)

assert isinstance(result, str)
if was_truncated:
assert len(result) <= 20 # Should respect budget
assert result.endswith("...")

def test_truncate_value_to_budget_respects_object_char_limit(self, truncator):
"""Test that _truncate_value_to_budget respects OBJECT_CHAR_LIMIT."""
# Even with large budget, should respect OBJECT_CHAR_LIMIT
large_string = "x" * 10000
large_budget = 20000
result, was_truncated = truncator._truncate_value_to_budget(large_string, large_budget)

if was_truncated:
assert len(result) <= OBJECT_CHAR_LIMIT + 3 # +3 for "..."


class TestEdgeCases: class TestEdgeCases:
"""Test edge cases and error conditions.""" """Test edge cases and error conditions."""


if isinstance(result.result, ObjectSegment): if isinstance(result.result, ObjectSegment):
result_size = truncator.calculate_json_size(result.result.value) result_size = truncator.calculate_json_size(result.result.value)
assert result_size <= original_size assert result_size <= original_size


class TestConstantsAndConfiguration:
"""Test behavior with different configuration constants."""

def test_large_variable_threshold_constant(self):
"""Test that LARGE_VARIABLE_THRESHOLD constant is properly used."""
truncator = VariableTruncator()
assert truncator._max_size_bytes == LARGE_VARIABLE_THRESHOLD
assert LARGE_VARIABLE_THRESHOLD == 10 * 1024 # 10KB

def test_string_truncation_limit_constant(self):
"""Test that STRING_TRUNCATION_LIMIT constant is properly used."""
truncator = VariableTruncator()
assert truncator._string_length_limit == 5000

def test_array_char_limit_constant(self):
"""Test that ARRAY_CHAR_LIMIT is used in array item truncation."""
truncator = VariableTruncator()

# Test that ARRAY_CHAR_LIMIT is respected in array item truncation
long_string = "x" * 2000
budget = 5000 # Large budget

result, was_truncated = truncator._truncate_item_to_budget(long_string, budget)
if was_truncated:
# Should not exceed ARRAY_CHAR_LIMIT even with large budget
assert len(result) <= ARRAY_CHAR_LIMIT + 3 # +3 for "..."

def test_object_char_limit_constant(self):
"""Test that OBJECT_CHAR_LIMIT is used in object value truncation."""
truncator = VariableTruncator()

# Test that OBJECT_CHAR_LIMIT is respected in object value truncation
long_string = "x" * 8000
large_budget = 20000

result, was_truncated = truncator._truncate_value_to_budget(long_string, large_budget)
if was_truncated:
# Should not exceed OBJECT_CHAR_LIMIT even with large budget
assert len(result) <= OBJECT_CHAR_LIMIT + 3 # +3 for "..."

+ 33
- 35
api/tests/unit_tests/services/workflow/test_draft_var_loader_simple.py Прегледај датотеку

"""Simplified unit tests for DraftVarLoader focusing on core functionality.""" """Simplified unit tests for DraftVarLoader focusing on core functionality."""

import json import json
from unittest.mock import Mock, patch from unittest.mock import Mock, patch


def draft_var_loader(self, mock_engine): def draft_var_loader(self, mock_engine):
"""Create DraftVarLoader instance for testing.""" """Create DraftVarLoader instance for testing."""
return DraftVarLoader( return DraftVarLoader(
engine=mock_engine,
app_id="test-app-id",
tenant_id="test-tenant-id",
fallback_variables=[]
engine=mock_engine, app_id="test-app-id", tenant_id="test-tenant-id", fallback_variables=[]
) )


def test_load_offloaded_variable_string_type_unit(self, draft_var_loader): def test_load_offloaded_variable_string_type_unit(self, draft_var_loader):
assert variable.name == "test_variable" assert variable.name == "test_variable"
assert variable.description == "test description" assert variable.description == "test description"
assert variable.value == test_content assert variable.value == test_content
# Verify storage was called correctly # Verify storage was called correctly
mock_storage.load.assert_called_once_with("storage/key/test.txt") mock_storage.load.assert_called_once_with("storage/key/test.txt")


assert variable.name == "test_object" assert variable.name == "test_object"
assert variable.description == "test description" assert variable.description == "test description"
assert variable.value == test_object assert variable.value == test_object
# Verify method calls # Verify method calls
mock_storage.load.assert_called_once_with("storage/key/test.json") mock_storage.load.assert_called_once_with("storage/key/test.json")
mock_build_segment.assert_called_once_with(SegmentType.OBJECT, test_object) mock_build_segment.assert_called_once_with(SegmentType.OBJECT, test_object)


with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment:
from core.variables.segments import FloatSegment from core.variables.segments import FloatSegment

mock_segment = FloatSegment(value=test_number) mock_segment = FloatSegment(value=test_number)
mock_build_segment.return_value = mock_segment mock_build_segment.return_value = mock_segment


assert variable.id == "draft-var-id" assert variable.id == "draft-var-id"
assert variable.name == "test_number" assert variable.name == "test_number"
assert variable.description == "test number description" assert variable.description == "test number description"
# Verify method calls # Verify method calls
mock_storage.load.assert_called_once_with("storage/key/test_number.json") mock_storage.load.assert_called_once_with("storage/key/test_number.json")
mock_build_segment.assert_called_once_with(SegmentType.NUMBER, test_number) mock_build_segment.assert_called_once_with(SegmentType.NUMBER, test_number)


draft_var = Mock(spec=WorkflowDraftVariable) draft_var = Mock(spec=WorkflowDraftVariable)
draft_var.id = "draft-var-id" draft_var.id = "draft-var-id"
draft_var.node_id = "test-node-id"
draft_var.node_id = "test-node-id"
draft_var.name = "test_array" draft_var.name = "test_array"
draft_var.description = "test array description" draft_var.description = "test array description"
draft_var.get_selector.return_value = ["test-node-id", "test_array"] draft_var.get_selector.return_value = ["test-node-id", "test_array"]


with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment:
from core.variables.segments import ArrayAnySegment from core.variables.segments import ArrayAnySegment

mock_segment = ArrayAnySegment(value=test_array) mock_segment = ArrayAnySegment(value=test_array)
mock_build_segment.return_value = mock_segment mock_build_segment.return_value = mock_segment


assert variable.id == "draft-var-id" assert variable.id == "draft-var-id"
assert variable.name == "test_array" assert variable.name == "test_array"
assert variable.description == "test array description" assert variable.description == "test array description"
# Verify method calls # Verify method calls
mock_storage.load.assert_called_once_with("storage/key/test_array.json") mock_storage.load.assert_called_once_with("storage/key/test_array.json")
mock_build_segment.assert_called_once_with(SegmentType.ARRAY_ANY, test_array) mock_build_segment.assert_called_once_with(SegmentType.ARRAY_ANY, test_array)


def test_load_variables_with_offloaded_variables_unit(self, draft_var_loader): def test_load_variables_with_offloaded_variables_unit(self, draft_var_loader):
"""Test load_variables method with mix of regular and offloaded variables.""" """Test load_variables method with mix of regular and offloaded variables."""
selectors = [
["node1", "regular_var"],
["node2", "offloaded_var"]
]
selectors = [["node1", "regular_var"], ["node2", "offloaded_var"]]


# Mock regular variable # Mock regular variable
regular_draft_var = Mock(spec=WorkflowDraftVariable) regular_draft_var = Mock(spec=WorkflowDraftVariable)
regular_draft_var.is_truncated.return_value = False regular_draft_var.is_truncated.return_value = False
regular_draft_var.node_id = "node1" regular_draft_var.node_id = "node1"
regular_draft_var.name = "regular_var"
regular_draft_var.name = "regular_var"
regular_draft_var.get_value.return_value = StringSegment(value="regular_value") regular_draft_var.get_value.return_value = StringSegment(value="regular_value")
regular_draft_var.get_selector.return_value = ["node1", "regular_var"] regular_draft_var.get_selector.return_value = ["node1", "regular_var"]
regular_draft_var.id = "regular-var-id" regular_draft_var.id = "regular-var-id"
# Mock offloaded variable # Mock offloaded variable
upload_file = Mock(spec=UploadFile) upload_file = Mock(spec=UploadFile)
upload_file.key = "storage/key/offloaded.txt" upload_file.key = "storage/key/offloaded.txt"
variable_file = Mock(spec=WorkflowDraftVariableFile) variable_file = Mock(spec=WorkflowDraftVariableFile)
variable_file.value_type = SegmentType.STRING variable_file.value_type = SegmentType.STRING
variable_file.upload_file = upload_file variable_file.upload_file = upload_file
with patch("services.workflow_draft_variable_service.Session") as mock_session_cls: with patch("services.workflow_draft_variable_service.Session") as mock_session_cls:
mock_session = Mock() mock_session = Mock()
mock_session_cls.return_value.__enter__.return_value = mock_session mock_session_cls.return_value.__enter__.return_value = mock_session
mock_service = Mock() mock_service = Mock()
mock_service.get_draft_variables_by_selectors.return_value = draft_vars mock_service.get_draft_variables_by_selectors.return_value = draft_vars
with patch("services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service):

with patch(
"services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service
):
with patch("services.workflow_draft_variable_service.StorageKeyLoader"): with patch("services.workflow_draft_variable_service.StorageKeyLoader"):
with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable: with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable:
# Mock regular variable creation # Mock regular variable creation
regular_variable = Mock() regular_variable = Mock()
regular_variable.selector = ["node1", "regular_var"] regular_variable.selector = ["node1", "regular_var"]
# Mock offloaded variable creation # Mock offloaded variable creation
offloaded_variable = Mock() offloaded_variable = Mock()
offloaded_variable.selector = ["node2", "offloaded_var"] offloaded_variable.selector = ["node2", "offloaded_var"]
mock_segment_to_variable.return_value = regular_variable mock_segment_to_variable.return_value = regular_variable
with patch("services.workflow_draft_variable_service.storage") as mock_storage: with patch("services.workflow_draft_variable_service.storage") as mock_storage:
mock_storage.load.return_value = b"offloaded_content" mock_storage.load.return_value = b"offloaded_content"
with patch.object(draft_var_loader, "_load_offloaded_variable") as mock_load_offloaded: with patch.object(draft_var_loader, "_load_offloaded_variable") as mock_load_offloaded:
mock_load_offloaded.return_value = (("node2", "offloaded_var"), offloaded_variable) mock_load_offloaded.return_value = (("node2", "offloaded_var"), offloaded_variable)
with patch("concurrent.futures.ThreadPoolExecutor") as mock_executor_cls: with patch("concurrent.futures.ThreadPoolExecutor") as mock_executor_cls:
mock_executor = Mock() mock_executor = Mock()
mock_executor_cls.return_value.__enter__.return_value = mock_executor mock_executor_cls.return_value.__enter__.return_value = mock_executor


# Verify results # Verify results
assert len(result) == 2 assert len(result) == 2
# Verify service method was called # Verify service method was called
mock_service.get_draft_variables_by_selectors.assert_called_once_with( mock_service.get_draft_variables_by_selectors.assert_called_once_with(
draft_var_loader._app_id, selectors draft_var_loader._app_id, selectors
) )
# Verify offloaded variable loading was called # Verify offloaded variable loading was called
mock_load_offloaded.assert_called_once_with(offloaded_draft_var) mock_load_offloaded.assert_called_once_with(offloaded_draft_var)


def test_load_variables_all_offloaded_variables_unit(self, draft_var_loader): def test_load_variables_all_offloaded_variables_unit(self, draft_var_loader):
"""Test load_variables method with only offloaded variables.""" """Test load_variables method with only offloaded variables."""
selectors = [
["node1", "offloaded_var1"],
["node2", "offloaded_var2"]
]
selectors = [["node1", "offloaded_var1"], ["node2", "offloaded_var2"]]


# Mock first offloaded variable # Mock first offloaded variable
offloaded_var1 = Mock(spec=WorkflowDraftVariable) offloaded_var1 = Mock(spec=WorkflowDraftVariable)
with patch("services.workflow_draft_variable_service.Session") as mock_session_cls: with patch("services.workflow_draft_variable_service.Session") as mock_session_cls:
mock_session = Mock() mock_session = Mock()
mock_session_cls.return_value.__enter__.return_value = mock_session mock_session_cls.return_value.__enter__.return_value = mock_session
mock_service = Mock() mock_service = Mock()
mock_service.get_draft_variables_by_selectors.return_value = draft_vars mock_service.get_draft_variables_by_selectors.return_value = draft_vars
with patch("services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service):

with patch(
"services.workflow_draft_variable_service.WorkflowDraftVariableService", return_value=mock_service
):
with patch("services.workflow_draft_variable_service.StorageKeyLoader"): with patch("services.workflow_draft_variable_service.StorageKeyLoader"):
with patch("services.workflow_draft_variable_service.ThreadPoolExecutor") as mock_executor_cls: with patch("services.workflow_draft_variable_service.ThreadPoolExecutor") as mock_executor_cls:
mock_executor = Mock() mock_executor = Mock()
mock_executor_cls.return_value.__enter__.return_value = mock_executor mock_executor_cls.return_value.__enter__.return_value = mock_executor
mock_executor.map.return_value = [ mock_executor.map.return_value = [
(("node1", "offloaded_var1"), Mock()), (("node1", "offloaded_var1"), Mock()),
(("node2", "offloaded_var2"), Mock())
(("node2", "offloaded_var2"), Mock()),
] ]


# Execute the method # Execute the method


# Verify results - since we have only offloaded variables, should have 2 results # Verify results - since we have only offloaded variables, should have 2 results
assert len(result) == 2 assert len(result) == 2
# Verify ThreadPoolExecutor was used # Verify ThreadPoolExecutor was used
mock_executor_cls.assert_called_once_with(max_workers=10) mock_executor_cls.assert_called_once_with(max_workers=10)
mock_executor.map.assert_called_once()
mock_executor.map.assert_called_once()

+ 2
- 0
api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py Прегледај датотеку

from core.variables.types import SegmentType from core.variables.types import SegmentType
from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID
from core.workflow.enums import NodeType from core.workflow.enums import NodeType
from libs.uuid_utils import uuidv7
from models.account import Account from models.account import Account
from models.enums import DraftVariableType from models.enums import DraftVariableType
from models.workflow import ( from models.workflow import (
created_by="test_user_id", created_by="test_user_id",
environment_variables=[], environment_variables=[],
conversation_variables=[], conversation_variables=[],
rag_pipeline_variables=[],
) )


def test_reset_conversation_variable(self, mock_session): def test_reset_conversation_variable(self, mock_session):

Loading…
Откажи
Сачувај