### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)tags/v0.16.0
| ent_df["entity"] = ent_df["entity_name"] | ent_df["entity"] = ent_df["entity_name"] | ||||
| del ent_df["entity_name"] | del ent_df["entity_name"] | ||||
| rela_df = pd.DataFrame(self._get_relation_(list(ent_df["entity"]), list(ent_df["entity"]), 10000)) | rela_df = pd.DataFrame(self._get_relation_(list(ent_df["entity"]), list(ent_df["entity"]), 10000)) | ||||
| if rela_df.empty: | |||||
| continue | |||||
| rela_df["source"] = rela_df["src_id"] | rela_df["source"] = rela_df["src_id"] | ||||
| rela_df["target"] = rela_df["tgt_id"] | rela_df["target"] = rela_df["tgt_id"] | ||||
| del rela_df["src_id"] | del rela_df["src_id"] |
| tenant_ids = tenant_ids.split(",") | tenant_ids = tenant_ids.split(",") | ||||
| idxnms = [index_name(tid) for tid in tenant_ids] | idxnms = [index_name(tid) for tid in tenant_ids] | ||||
| ty_kwds = [] | ty_kwds = [] | ||||
| ents = [] | |||||
| try: | try: | ||||
| ty_kwds, ents = self.query_rewrite(llm, qst, [index_name(tid) for tid in tenant_ids], kb_ids) | ty_kwds, ents = self.query_rewrite(llm, qst, [index_name(tid) for tid in tenant_ids], kb_ids) | ||||
| logging.info(f"Q: {qst}, Types: {ty_kwds}, Entities: {ents}") | logging.info(f"Q: {qst}, Types: {ty_kwds}, Entities: {ents}") | ||||
| nhop_pathes = defaultdict(dict) | nhop_pathes = defaultdict(dict) | ||||
| for _, ent in ents_from_query.items(): | for _, ent in ents_from_query.items(): | ||||
| nhops = ent.get("n_hop_ents", []) | nhops = ent.get("n_hop_ents", []) | ||||
| if not isinstance(nhops, list): | |||||
| logging.warning(f"Abnormal n_hop_ents: {nhops}") | |||||
| continue | |||||
| for nbr in nhops: | for nbr in nhops: | ||||
| path = nbr["path"] | path = nbr["path"] | ||||
| wts = nbr["weights"] | wts = nbr["weights"] | ||||
| "From Entity": f, | "From Entity": f, | ||||
| "To Entity": t, | "To Entity": t, | ||||
| "Score": "%.2f" % (rel["sim"] * rel["pagerank"]), | "Score": "%.2f" % (rel["sim"] * rel["pagerank"]), | ||||
| "Description": json.loads(ent["description"]).get("description", "") | |||||
| "Description": json.loads(rel["description"]).get("description", "") | |||||
| }) | }) | ||||
| max_token -= num_tokens_from_string(str(relas[-1])) | max_token -= num_tokens_from_string(str(relas[-1])) | ||||
| if max_token <= 0: | if max_token <= 0: |
| break | break | ||||
| id = sres.ids[i] | id = sres.ids[i] | ||||
| chunk = sres.field[id] | chunk = sres.field[id] | ||||
| dnm = chunk["docnm_kwd"] | |||||
| did = chunk["doc_id"] | |||||
| dnm = chunk.get("docnm_kwd", "") | |||||
| did = chunk.get("doc_id", "") | |||||
| position_int = chunk.get("position_int", []) | position_int = chunk.get("position_int", []) | ||||
| d = { | d = { | ||||
| "chunk_id": id, | "chunk_id": id, | ||||
| "content_ltks": chunk["content_ltks"], | "content_ltks": chunk["content_ltks"], | ||||
| "content_with_weight": chunk["content_with_weight"], | "content_with_weight": chunk["content_with_weight"], | ||||
| "doc_id": chunk["doc_id"], | |||||
| "doc_id": did, | |||||
| "docnm_kwd": dnm, | "docnm_kwd": dnm, | ||||
| "kb_id": chunk["kb_id"], | "kb_id": chunk["kb_id"], | ||||
| "important_kwd": chunk.get("important_kwd", []), | "important_kwd": chunk.get("important_kwd", []), |