| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 | 
							- from __future__ import annotations
 - from abc import abstractmethod, ABC
 - from typing import List, Any
 - 
 - from langchain.schema import Document, BaseRetriever
 - 
 - from models.dataset import Dataset
 - 
 - 
 - class BaseIndex(ABC):
 - 
 -     def __init__(self, dataset: Dataset):
 -         self.dataset = dataset
 - 
 -     @abstractmethod
 -     def create(self, texts: list[Document], **kwargs) -> BaseIndex:
 -         raise NotImplementedError
 - 
 -     @abstractmethod
 -     def add_texts(self, texts: list[Document], **kwargs):
 -         raise NotImplementedError
 - 
 -     @abstractmethod
 -     def text_exists(self, id: str) -> bool:
 -         raise NotImplementedError
 - 
 -     @abstractmethod
 -     def delete_by_ids(self, ids: list[str]) -> None:
 -         raise NotImplementedError
 - 
 -     @abstractmethod
 -     def delete_by_document_id(self, document_id: str):
 -         raise NotImplementedError
 - 
 -     @abstractmethod
 -     def get_retriever(self, **kwargs: Any) -> BaseRetriever:
 -         raise NotImplementedError
 - 
 -     @abstractmethod
 -     def search(
 -             self, query: str,
 -             **kwargs: Any
 -     ) -> List[Document]:
 -         raise NotImplementedError
 - 
 -     def delete(self) -> None:
 -         raise NotImplementedError
 - 
 -     def _filter_duplicate_texts(self, texts: list[Document]) -> list[Document]:
 -         for text in texts:
 -             doc_id = text.metadata['doc_id']
 -             exists_duplicate_node = self.text_exists(doc_id)
 -             if exists_duplicate_node:
 -                 texts.remove(text)
 - 
 -         return texts
 - 
 -     def _get_uuids(self, texts: list[Document]) -> list[str]:
 -         return [text.metadata['doc_id'] for text in texts]
 
 
  |