| 12345678910111213141516171819202122232425262728293031323334 | 
							- import logging
 - 
 - from bs4 import BeautifulSoup
 - from langchain.document_loaders.base import BaseLoader
 - from langchain.schema import Document
 - 
 - logger = logging.getLogger(__name__)
 - 
 - 
 - class HTMLLoader(BaseLoader):
 -     """Load html files.
 - 
 - 
 -     Args:
 -         file_path: Path to the file to load.
 -     """
 - 
 -     def __init__(
 -         self,
 -         file_path: str
 -     ):
 -         """Initialize with file path."""
 -         self._file_path = file_path
 - 
 -     def load(self) -> list[Document]:
 -         return [Document(page_content=self._load_as_text())]
 - 
 -     def _load_as_text(self) -> str:
 -         with open(self._file_path, "rb") as fp:
 -             soup = BeautifulSoup(fp, 'html.parser')
 -             text = soup.get_text()
 -             text = text.strip() if text else ''
 - 
 -         return text
 
 
  |