| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- from collections.abc import Generator
- from dataclasses import dataclass, field
- from typing import TypeVar, Union
-
- from core.agent.entities import AgentInvokeMessage
- from core.tools.entities.tool_entities import ToolInvokeMessage
-
- MessageType = TypeVar("MessageType", bound=Union[ToolInvokeMessage, AgentInvokeMessage])
-
-
- @dataclass
- class FileChunk:
- """
- Buffer for accumulating file chunks during streaming.
- """
-
- total_length: int
- bytes_written: int = field(default=0, init=False)
- data: bytearray = field(init=False)
-
- def __post_init__(self) -> None:
- self.data = bytearray(self.total_length)
-
-
- def merge_blob_chunks(
- response: Generator[MessageType, None, None],
- max_file_size: int = 30 * 1024 * 1024,
- max_chunk_size: int = 8192,
- ) -> Generator[MessageType, None, None]:
- """
- Merge streaming blob chunks into complete blob messages.
-
- This function processes a stream of plugin invoke messages, accumulating
- BLOB_CHUNK messages by their ID until the final chunk is received,
- then yielding a single complete BLOB message.
-
- Args:
- response: Generator yielding messages that may include blob chunks
- max_file_size: Maximum allowed file size in bytes (default: 30MB)
- max_chunk_size: Maximum allowed chunk size in bytes (default: 8KB)
-
- Yields:
- Messages from the response stream, with blob chunks merged into complete blobs
-
- Raises:
- ValueError: If file size exceeds max_file_size or chunk size exceeds max_chunk_size
- """
- files: dict[str, FileChunk] = {}
-
- for resp in response:
- if resp.type == ToolInvokeMessage.MessageType.BLOB_CHUNK:
- assert isinstance(resp.message, ToolInvokeMessage.BlobChunkMessage)
- # Get blob chunk information
- chunk_id = resp.message.id
- total_length = resp.message.total_length
- blob_data = resp.message.blob
- is_end = resp.message.end
-
- # Initialize buffer for this file if it doesn't exist
- if chunk_id not in files:
- files[chunk_id] = FileChunk(total_length)
-
- # Check if file is too large (before appending)
- if files[chunk_id].bytes_written + len(blob_data) > max_file_size:
- # Delete the file if it's too large
- del files[chunk_id]
- raise ValueError(f"File is too large which reached the limit of {max_file_size / 1024 / 1024}MB")
-
- # Check if single chunk is too large
- if len(blob_data) > max_chunk_size:
- raise ValueError(f"File chunk is too large which reached the limit of {max_chunk_size / 1024}KB")
-
- # Append the blob data to the buffer
- files[chunk_id].data[files[chunk_id].bytes_written : files[chunk_id].bytes_written + len(blob_data)] = (
- blob_data
- )
- files[chunk_id].bytes_written += len(blob_data)
-
- # If this is the final chunk, yield a complete blob message
- if is_end:
- # Create the appropriate message type based on the response type
- message_class = type(resp)
- merged_message = message_class(
- type=ToolInvokeMessage.MessageType.BLOB,
- message=ToolInvokeMessage.BlobMessage(blob=files[chunk_id].data[: files[chunk_id].bytes_written]),
- meta=resp.meta,
- )
- yield merged_message
- # Clean up the buffer
- del files[chunk_id]
- else:
- yield resp
|