|
|
|
|
|
|
|
|
from typing import Optional |
|
|
from typing import Optional |
|
|
|
|
|
|
|
|
from core.rag.extractor.extractor_base import BaseExtractor |
|
|
from core.rag.extractor.extractor_base import BaseExtractor |
|
|
|
|
|
from core.rag.extractor.helpers import detect_file_encodings |
|
|
from core.rag.models.document import Document |
|
|
from core.rag.models.document import Document |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs = self._read_from_file(csvfile) |
|
|
docs = self._read_from_file(csvfile) |
|
|
except UnicodeDecodeError as e: |
|
|
except UnicodeDecodeError as e: |
|
|
if self._autodetect_encoding: |
|
|
if self._autodetect_encoding: |
|
|
detected_encodings = detect_filze_encodings(self._file_path) |
|
|
|
|
|
|
|
|
detected_encodings = detect_file_encodings(self._file_path) |
|
|
for encoding in detected_encodings: |
|
|
for encoding in detected_encodings: |
|
|
try: |
|
|
try: |
|
|
with open(self._file_path, newline="", encoding=encoding.encoding) as csvfile: |
|
|
with open(self._file_path, newline="", encoding=encoding.encoding) as csvfile: |