ソースを参照

remove duplicated key in mind map (#1809)

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.9.0
Kevin Hu 1年前
コミット
2452c5624f
コミッターのメールアドレスに関連付けられたアカウントが存在しません
2個のファイルの変更11行の追加5行の削除
  1. 10
    5
      graphrag/index.py
  2. 1
    0
      rag/nlp/__init__.py

+ 10
- 5
graphrag/index.py ファイルの表示

from rag.utils import num_tokens_from_string from rag.utils import num_tokens_from_string




def be_children(obj: dict):
def be_children(obj: dict, keyset:set):
arr = [] arr = []
for k,v in obj.items(): for k,v in obj.items():
k = re.sub(r"\*+", "", k) k = re.sub(r"\*+", "", k)
if not k :continue
if not k or k in keyset:continue
keyset.add(k)
arr.append({ arr.append({
"id": k, "id": k,
"children": be_children(v) if isinstance(v, dict) else []
"children": be_children(v, keyset) if isinstance(v, dict) else []
}) })
return arr return arr


mg = mindmap(_chunks).output mg = mindmap(_chunks).output
if not len(mg.keys()): return chunks if not len(mg.keys()): return chunks


if len(mg.keys()) > 1: md_map = {"id": "root", "children": [{"id": re.sub(r"\*+", "", k), "children": be_children(v)} for k,v in mg.items() if isinstance(v, dict) and re.sub(r"\*+", "", k)]}
else: md_map = {"id": re.sub(r"\*+", "", list(mg.keys())[0]), "children": be_children(list(mg.items())[1])}
if len(mg.keys()) > 1:
keyset = set([re.sub(r"\*+", "", k) for k,v in mg.items() if isinstance(v, dict) and re.sub(r"\*+", "", k)])
md_map = {"id": "root", "children": [{"id": re.sub(r"\*+", "", k), "children": be_children(v, keyset)} for k,v in mg.items() if isinstance(v, dict) and re.sub(r"\*+", "", k)]}
else:
k = re.sub(r"\*+", "", list(mg.keys())[0])
md_map = {"id": k, "children": be_children(list(mg.items())[0][1], set([k]))}
print(json.dumps(md_map, ensure_ascii=False, indent=2)) print(json.dumps(md_map, ensure_ascii=False, indent=2))
chunks.append( chunks.append(
{ {

+ 1
- 0
rag/nlp/__init__.py ファイルの表示

def add_chunk(t, pos): def add_chunk(t, pos):
nonlocal cks, tk_nums, delimiter nonlocal cks, tk_nums, delimiter
tnum = num_tokens_from_string(t) tnum = num_tokens_from_string(t)
if not pos: pos = ""
if tnum < 8: if tnum < 8:
pos = "" pos = ""
# Ensure that the length of the merged chunk does not exceed chunk_token_num # Ensure that the length of the merged chunk does not exceed chunk_token_num

読み込み中…
キャンセル
保存