Browse Source

optimize _merge_splits function by using enumerate instead of manual index tracking (#25680)

Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
tags/1.9.0
ChasePassion 1 month ago
parent
commit
a3f2c05632
No account linked to committer's email address
1 changed files with 1 additions and 4 deletions
  1. 1
    4
      api/core/rag/splitter/text_splitter.py

+ 1
- 4
api/core/rag/splitter/text_splitter.py View File

docs = [] docs = []
current_doc: list[str] = [] current_doc: list[str] = []
total = 0 total = 0
index = 0
for d in splits:
_len = lengths[index]
for d, _len in zip(splits, lengths):
if total + _len + (separator_len if len(current_doc) > 0 else 0) > self._chunk_size: if total + _len + (separator_len if len(current_doc) > 0 else 0) > self._chunk_size:
if total > self._chunk_size: if total > self._chunk_size:
logger.warning( logger.warning(
current_doc = current_doc[1:] current_doc = current_doc[1:]
current_doc.append(d) current_doc.append(d)
total += _len + (separator_len if len(current_doc) > 1 else 0) total += _len + (separator_len if len(current_doc) > 1 else 0)
index += 1
doc = self._join_docs(current_doc, separator) doc = self._join_docs(current_doc, separator)
if doc is not None: if doc is not None:
docs.append(doc) docs.append(doc)

Loading…
Cancel
Save