| 
                        123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 | 
                        - from abc import ABC, abstractmethod
 - from collections.abc import Sequence
 - from typing import Any, Optional
 - 
 - from pydantic import BaseModel
 - 
 - 
 - class ChildDocument(BaseModel):
 -     """Class for storing a piece of text and associated metadata."""
 - 
 -     page_content: str
 - 
 -     vector: Optional[list[float]] = None
 - 
 -     """Arbitrary metadata about the page content (e.g., source, relationships to other
 -         documents, etc.).
 -     """
 -     metadata: dict = {}
 - 
 - 
 - class Document(BaseModel):
 -     """Class for storing a piece of text and associated metadata."""
 - 
 -     page_content: str
 - 
 -     vector: Optional[list[float]] = None
 - 
 -     """Arbitrary metadata about the page content (e.g., source, relationships to other
 -         documents, etc.).
 -     """
 -     metadata: dict = {}
 - 
 -     provider: Optional[str] = "dify"
 - 
 -     children: Optional[list[ChildDocument]] = None
 - 
 - 
 - class BaseDocumentTransformer(ABC):
 -     """Abstract base class for document transformation systems.
 - 
 -     A document transformation system takes a sequence of Documents and returns a
 -     sequence of transformed Documents.
 - 
 -     Example:
 -         .. code-block:: python
 - 
 -             class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
 -                 model_config = ConfigDict(arbitrary_types_allowed=True)
 - 
 -                 embeddings: Embeddings
 -                 similarity_fn: Callable = cosine_similarity
 -                 similarity_threshold: float = 0.95
 - 
 -                 def transform_documents(
 -                     self, documents: Sequence[Document], **kwargs: Any
 -                 ) -> Sequence[Document]:
 -                     stateful_documents = get_stateful_documents(documents)
 -                     embedded_documents = _get_embeddings_from_stateful_docs(
 -                         self.embeddings, stateful_documents
 -                     )
 -                     included_idxs = _filter_similar_embeddings(
 -                         embedded_documents, self.similarity_fn, self.similarity_threshold
 -                     )
 -                     return [stateful_documents[i] for i in sorted(included_idxs)]
 - 
 -                 async def atransform_documents(
 -                     self, documents: Sequence[Document], **kwargs: Any
 -                 ) -> Sequence[Document]:
 -                     raise NotImplementedError
 - 
 -     """
 - 
 -     @abstractmethod
 -     def transform_documents(self, documents: Sequence[Document], **kwargs: Any) -> Sequence[Document]:
 -         """Transform a list of documents.
 - 
 -         Args:
 -             documents: A sequence of Documents to be transformed.
 - 
 -         Returns:
 -             A list of transformed Documents.
 -         """
 - 
 -     @abstractmethod
 -     async def atransform_documents(self, documents: Sequence[Document], **kwargs: Any) -> Sequence[Document]:
 -         """Asynchronously transform a list of documents.
 - 
 -         Args:
 -             documents: A sequence of Documents to be transformed.
 - 
 -         Returns:
 -             A list of transformed Documents.
 -         """
 
 
  |