### What problem does this PR solve? #7623 ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.19.0
@@ -15,6 +15,7 @@ | |||
# | |||
import json | |||
import logging | |||
import re | |||
from abc import ABC | |||
import pandas as pd | |||
@@ -59,6 +60,7 @@ class Retrieval(ComponentBase, ABC): | |||
def _run(self, history, **kwargs): | |||
query = self.get_input() | |||
query = str(query["content"][0]) if "content" in query else "" | |||
query = re.split(r"(USER:|ASSISTANT:)", query)[-1] | |||
kb_ids: list[str] = self._param.kb_ids or [] | |||
@@ -210,6 +210,7 @@ def completion(): | |||
"dataset_id": get_value(ck, "kb_id", "dataset_id"), | |||
"image_id": get_value(ck, "image_id", "img_id"), | |||
"positions": get_value(ck, "positions", "position_int"), | |||
"doc_type": get_value(ck, "doc_type_kwd", "doc_type_kwd"), | |||
} | |||
for ck in ref.get("chunks", []) | |||
] |
@@ -117,7 +117,9 @@ def kb_prompt(kbinfos, max_tokens): | |||
doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []}) | |||
for i, ck in enumerate(kbinfos["chunks"][:chunks_num]): | |||
doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + f"ID: {i}\n" + ck["content_with_weight"]) | |||
cnt = f"---\nID: {i}\n" + (f"URL: {ck['url']}\n" if "url" in ck else "") | |||
cnt += ck["content_with_weight"] | |||
doc2chunks[ck["docnm_kwd"]]["chunks"].append(cnt) | |||
doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {}) | |||
knowledges = [] |