|
|
|
|
|
|
|
|
for i in range(len(chunks)): |
|
|
for i in range(len(chunks)): |
|
|
tkn_cnt = num_tokens_from_string(chunks[i]) |
|
|
tkn_cnt = num_tokens_from_string(chunks[i]) |
|
|
if cnt+tkn_cnt >= left_token_count and texts: |
|
|
if cnt+tkn_cnt >= left_token_count and texts: |
|
|
threads.append(exe.submit(ext, texts, {"entity_types": entity_types}, callback)) |
|
|
|
|
|
|
|
|
for b in range(0, len(texts), 16): |
|
|
|
|
|
threads.append(exe.submit(ext, ["\n".join(texts[b:b+16])], {"entity_types": entity_types}, callback)) |
|
|
texts = [] |
|
|
texts = [] |
|
|
cnt = 0 |
|
|
cnt = 0 |
|
|
texts.append(chunks[i]) |
|
|
texts.append(chunks[i]) |
|
|
cnt += tkn_cnt |
|
|
cnt += tkn_cnt |
|
|
if texts: |
|
|
if texts: |
|
|
threads.append(exe.submit(ext, texts)) |
|
|
|
|
|
|
|
|
for b in range(0, len(texts), 16): |
|
|
|
|
|
threads.append(exe.submit(ext, ["\n".join(texts[b:b+16])], {"entity_types": entity_types}, callback)) |
|
|
|
|
|
|
|
|
callback(0.5, "Extracting entities.") |
|
|
callback(0.5, "Extracting entities.") |
|
|
graphs = [] |
|
|
graphs = [] |