|
|
|
@@ -140,11 +140,10 @@ class NotionExtractor(BaseExtractor): |
|
|
|
|
|
|
|
def _get_notion_block_data(self, page_id: str) -> list[str]: |
|
|
|
result_lines_arr = [] |
|
|
|
cur_block_id = page_id |
|
|
|
start_cursor = None |
|
|
|
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=page_id) |
|
|
|
while True: |
|
|
|
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id) |
|
|
|
query_dict: dict[str, Any] = {} |
|
|
|
|
|
|
|
query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor} |
|
|
|
res = requests.request( |
|
|
|
"GET", |
|
|
|
block_url, |
|
|
|
@@ -153,7 +152,7 @@ class NotionExtractor(BaseExtractor): |
|
|
|
"Content-Type": "application/json", |
|
|
|
"Notion-Version": "2022-06-28", |
|
|
|
}, |
|
|
|
json=query_dict |
|
|
|
params=query_dict |
|
|
|
) |
|
|
|
data = res.json() |
|
|
|
for result in data["results"]: |
|
|
|
@@ -191,16 +190,16 @@ class NotionExtractor(BaseExtractor): |
|
|
|
if data["next_cursor"] is None: |
|
|
|
break |
|
|
|
else: |
|
|
|
cur_block_id = data["next_cursor"] |
|
|
|
start_cursor = data["next_cursor"] |
|
|
|
return result_lines_arr |
|
|
|
|
|
|
|
def _read_block(self, block_id: str, num_tabs: int = 0) -> str: |
|
|
|
"""Read a block.""" |
|
|
|
result_lines_arr = [] |
|
|
|
cur_block_id = block_id |
|
|
|
start_cursor = None |
|
|
|
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=block_id) |
|
|
|
while True: |
|
|
|
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id) |
|
|
|
query_dict: dict[str, Any] = {} |
|
|
|
query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor} |
|
|
|
|
|
|
|
res = requests.request( |
|
|
|
"GET", |
|
|
|
@@ -210,7 +209,7 @@ class NotionExtractor(BaseExtractor): |
|
|
|
"Content-Type": "application/json", |
|
|
|
"Notion-Version": "2022-06-28", |
|
|
|
}, |
|
|
|
json=query_dict |
|
|
|
params=query_dict |
|
|
|
) |
|
|
|
data = res.json() |
|
|
|
if 'results' not in data or data["results"] is None: |
|
|
|
@@ -249,7 +248,7 @@ class NotionExtractor(BaseExtractor): |
|
|
|
if data["next_cursor"] is None: |
|
|
|
break |
|
|
|
else: |
|
|
|
cur_block_id = data["next_cursor"] |
|
|
|
start_cursor = data["next_cursor"] |
|
|
|
|
|
|
|
result_lines = "\n".join(result_lines_arr) |
|
|
|
return result_lines |
|
|
|
@@ -258,10 +257,10 @@ class NotionExtractor(BaseExtractor): |
|
|
|
"""Read table rows.""" |
|
|
|
done = False |
|
|
|
result_lines_arr = [] |
|
|
|
cur_block_id = block_id |
|
|
|
start_cursor = None |
|
|
|
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=block_id) |
|
|
|
while not done: |
|
|
|
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id) |
|
|
|
query_dict: dict[str, Any] = {} |
|
|
|
query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor} |
|
|
|
|
|
|
|
res = requests.request( |
|
|
|
"GET", |
|
|
|
@@ -271,7 +270,7 @@ class NotionExtractor(BaseExtractor): |
|
|
|
"Content-Type": "application/json", |
|
|
|
"Notion-Version": "2022-06-28", |
|
|
|
}, |
|
|
|
json=query_dict |
|
|
|
params=query_dict |
|
|
|
) |
|
|
|
data = res.json() |
|
|
|
# get table headers text |
|
|
|
@@ -300,7 +299,7 @@ class NotionExtractor(BaseExtractor): |
|
|
|
done = True |
|
|
|
break |
|
|
|
else: |
|
|
|
cur_block_id = data["next_cursor"] |
|
|
|
start_cursor = data["next_cursor"] |
|
|
|
|
|
|
|
result_lines = "\n".join(result_lines_arr) |
|
|
|
return result_lines |