|
|
|
|
|
|
|
|
file_bin = STORAGE_IMPL.get(bucket, name) |
|
|
file_bin = STORAGE_IMPL.get(bucket, name) |
|
|
do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC") |
|
|
do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC") |
|
|
pages = PdfParser.total_page_number(doc["name"], file_bin) |
|
|
pages = PdfParser.total_page_number(doc["name"], file_bin) |
|
|
page_size = doc["parser_config"].get("task_page_size", 12) |
|
|
|
|
|
|
|
|
page_size = doc["parser_config"].get("task_page_size") or 12 |
|
|
if doc["parser_id"] == "paper": |
|
|
if doc["parser_id"] == "paper": |
|
|
page_size = doc["parser_config"].get("task_page_size", 22) |
|
|
|
|
|
|
|
|
page_size = doc["parser_config"].get("task_page_size") or 22 |
|
|
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC": |
|
|
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC": |
|
|
page_size = 10 ** 9 |
|
|
page_size = 10 ** 9 |
|
|
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)] |
|
|
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)] |