Browse Source

1506 remove duplicated code (#1511)

tags/0.3.30
crazywoola 1 year ago
parent
commit
d0e1ea8f06
No account linked to committer's email address
1 changed files with 10 additions and 31 deletions
  1. 10
    31
      api/core/indexing_runner.py

+ 10
- 31
api/core/indexing_runner.py View File

@@ -89,22 +89,6 @@ class IndexingRunner:
dataset_document.stopped_at = datetime.datetime.utcnow()
db.session.commit()

def format_split_text(self, text):
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
matches = re.findall(regex, text, re.MULTILINE)

result = []
for match in matches:
q = match[0]
a = match[1]
if q and a:
result.append({
"question": q,
"answer": re.sub(r"\n\s*", "\n", a.strip())
})

return result

def run_in_splitting_status(self, dataset_document: DatasetDocument):
"""Run the indexing process when the index_status is splitting."""
try:
@@ -647,21 +631,16 @@ class IndexingRunner:
return text

def format_split_text(self, text):
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)" # 匹配Q和A的正则表达式
matches = re.findall(regex, text, re.MULTILINE) # 获取所有匹配到的结果

result = [] # 存储最终的结果
for match in matches:
q = match[0]
a = match[1]
if q and a:
# 如果Q和A都存在,就将其添加到结果中
result.append({
"question": q,
"answer": re.sub(r"\n\s*", "\n", a.strip())
})

return result
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
matches = re.findall(regex, text, re.MULTILINE)

return [
{
"question": q,
"answer": re.sub(r"\n\s*", "\n", a.strip())
}
for q, a in matches if q and a
]

def _build_index(self, dataset: Dataset, dataset_document: DatasetDocument, documents: List[Document]) -> None:
"""

Loading…
Cancel
Save