| 12345678910111213141516171819202122 | 
							- from pathlib import Path
 - from typing import Dict
 - 
 - from bs4 import BeautifulSoup
 - from llama_index.readers.file.base_parser import BaseParser
 - 
 - 
 - class HTMLParser(BaseParser):
 -     """HTML parser."""
 - 
 -     def _init_parser(self) -> Dict:
 -         """Init parser."""
 -         return {}
 - 
 -     def parse_file(self, file: Path, errors: str = "ignore") -> str:
 -         """Parse file."""
 -         with open(file, "rb") as fp:
 -             soup = BeautifulSoup(fp, 'html.parser')
 -             text = soup.get_text()
 -             text = text.strip() if text else ''
 - 
 -         return text
 
 
  |