### What problem does this PR solve? #7623 ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.19.0
| @@ -15,6 +15,7 @@ | |||
| # | |||
| import json | |||
| import logging | |||
| import re | |||
| from abc import ABC | |||
| import pandas as pd | |||
| @@ -59,6 +60,7 @@ class Retrieval(ComponentBase, ABC): | |||
| def _run(self, history, **kwargs): | |||
| query = self.get_input() | |||
| query = str(query["content"][0]) if "content" in query else "" | |||
| query = re.split(r"(USER:|ASSISTANT:)", query)[-1] | |||
| kb_ids: list[str] = self._param.kb_ids or [] | |||
| @@ -210,6 +210,7 @@ def completion(): | |||
| "dataset_id": get_value(ck, "kb_id", "dataset_id"), | |||
| "image_id": get_value(ck, "image_id", "img_id"), | |||
| "positions": get_value(ck, "positions", "position_int"), | |||
| "doc_type": get_value(ck, "doc_type_kwd", "doc_type_kwd"), | |||
| } | |||
| for ck in ref.get("chunks", []) | |||
| ] | |||
| @@ -117,7 +117,9 @@ def kb_prompt(kbinfos, max_tokens): | |||
| doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []}) | |||
| for i, ck in enumerate(kbinfos["chunks"][:chunks_num]): | |||
| doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + f"ID: {i}\n" + ck["content_with_weight"]) | |||
| cnt = f"---\nID: {i}\n" + (f"URL: {ck['url']}\n" if "url" in ck else "") | |||
| cnt += ck["content_with_weight"] | |||
| doc2chunks[ck["docnm_kwd"]]["chunks"].append(cnt) | |||
| doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {}) | |||
| knowledges = [] | |||