|
|
|
|
|
|
|
|
for q, a in excel_parser(filename, binary, callback): |
|
|
for q, a in excel_parser(filename, binary, callback): |
|
|
res.append(beAdoc(deepcopy(doc), q, a, eng)) |
|
|
res.append(beAdoc(deepcopy(doc), q, a, eng)) |
|
|
return res |
|
|
return res |
|
|
|
|
|
|
|
|
elif re.search(r"\.(txt|csv)$", filename, re.IGNORECASE): |
|
|
elif re.search(r"\.(txt|csv)$", filename, re.IGNORECASE): |
|
|
callback(0.1, "Start to parse.") |
|
|
callback(0.1, "Start to parse.") |
|
|
txt = get_text(filename, binary) |
|
|
txt = get_text(filename, binary) |
|
|
|
|
|
|
|
|
f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) |
|
|
f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) |
|
|
|
|
|
|
|
|
return res |
|
|
return res |
|
|
|
|
|
|
|
|
elif re.search(r"\.pdf$", filename, re.IGNORECASE): |
|
|
elif re.search(r"\.pdf$", filename, re.IGNORECASE): |
|
|
callback(0.1, "Start to parse.") |
|
|
callback(0.1, "Start to parse.") |
|
|
pdf_parser = Pdf() |
|
|
pdf_parser = Pdf() |
|
|
qai_list, tbls = pdf_parser(filename if not binary else binary, |
|
|
qai_list, tbls = pdf_parser(filename if not binary else binary, |
|
|
from_page=0, to_page=10000, callback=callback) |
|
|
from_page=0, to_page=10000, callback=callback) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for q, a, image, poss in qai_list: |
|
|
for q, a, image, poss in qai_list: |
|
|
res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss)) |
|
|
res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss)) |
|
|
return res |
|
|
return res |
|
|
|
|
|
|
|
|
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): |
|
|
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): |
|
|
callback(0.1, "Start to parse.") |
|
|
callback(0.1, "Start to parse.") |
|
|
txt = get_text(filename, binary) |
|
|
txt = get_text(filename, binary) |