You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

chunk_merger.py 3.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. from collections.abc import Generator
  2. from dataclasses import dataclass, field
  3. from typing import TypeVar, Union
  4. from core.agent.entities import AgentInvokeMessage
  5. from core.tools.entities.tool_entities import ToolInvokeMessage
  6. MessageType = TypeVar("MessageType", bound=Union[ToolInvokeMessage, AgentInvokeMessage])
  7. @dataclass
  8. class FileChunk:
  9. """
  10. Buffer for accumulating file chunks during streaming.
  11. """
  12. total_length: int
  13. bytes_written: int = field(default=0, init=False)
  14. data: bytearray = field(init=False)
  15. def __post_init__(self) -> None:
  16. self.data = bytearray(self.total_length)
  17. def merge_blob_chunks(
  18. response: Generator[MessageType, None, None],
  19. max_file_size: int = 30 * 1024 * 1024,
  20. max_chunk_size: int = 8192,
  21. ) -> Generator[MessageType, None, None]:
  22. """
  23. Merge streaming blob chunks into complete blob messages.
  24. This function processes a stream of plugin invoke messages, accumulating
  25. BLOB_CHUNK messages by their ID until the final chunk is received,
  26. then yielding a single complete BLOB message.
  27. Args:
  28. response: Generator yielding messages that may include blob chunks
  29. max_file_size: Maximum allowed file size in bytes (default: 30MB)
  30. max_chunk_size: Maximum allowed chunk size in bytes (default: 8KB)
  31. Yields:
  32. Messages from the response stream, with blob chunks merged into complete blobs
  33. Raises:
  34. ValueError: If file size exceeds max_file_size or chunk size exceeds max_chunk_size
  35. """
  36. files: dict[str, FileChunk] = {}
  37. for resp in response:
  38. if resp.type == ToolInvokeMessage.MessageType.BLOB_CHUNK:
  39. assert isinstance(resp.message, ToolInvokeMessage.BlobChunkMessage)
  40. # Get blob chunk information
  41. chunk_id = resp.message.id
  42. total_length = resp.message.total_length
  43. blob_data = resp.message.blob
  44. is_end = resp.message.end
  45. # Initialize buffer for this file if it doesn't exist
  46. if chunk_id not in files:
  47. files[chunk_id] = FileChunk(total_length)
  48. # Check if file is too large (before appending)
  49. if files[chunk_id].bytes_written + len(blob_data) > max_file_size:
  50. # Delete the file if it's too large
  51. del files[chunk_id]
  52. raise ValueError(f"File is too large which reached the limit of {max_file_size / 1024 / 1024}MB")
  53. # Check if single chunk is too large
  54. if len(blob_data) > max_chunk_size:
  55. raise ValueError(f"File chunk is too large which reached the limit of {max_chunk_size / 1024}KB")
  56. # Append the blob data to the buffer
  57. files[chunk_id].data[files[chunk_id].bytes_written : files[chunk_id].bytes_written + len(blob_data)] = (
  58. blob_data
  59. )
  60. files[chunk_id].bytes_written += len(blob_data)
  61. # If this is the final chunk, yield a complete blob message
  62. if is_end:
  63. # Create the appropriate message type based on the response type
  64. message_class = type(resp)
  65. merged_message = message_class(
  66. type=ToolInvokeMessage.MessageType.BLOB,
  67. message=ToolInvokeMessage.BlobMessage(blob=files[chunk_id].data[: files[chunk_id].bytes_written]),
  68. meta=resp.meta,
  69. )
  70. yield merged_message
  71. # Clean up the buffer
  72. del files[chunk_id]
  73. else:
  74. yield resp