### What problem does this PR solve? This small PR resolves the regex library warnings showing in Python3.11: ```python DeprecationWarning: 'count' is passed as positional argument ``` ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [x] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>tags/v0.19.x
| nm = re.sub(r"&", "&", nm) | nm = re.sub(r"&", "&", nm) | ||||
| nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm) | nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm) | ||||
| nm = re.sub( | nm = re.sub( | ||||
| r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE | |||||
| r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, count=10000, flags=re.IGNORECASE | |||||
| ) | ) | ||||
| nm = re.sub( | nm = re.sub( | ||||
| r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", | r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", | ||||
| "", | "", | ||||
| nm, | nm, | ||||
| 10000, | |||||
| re.IGNORECASE, | |||||
| count=10000, | |||||
| flags=re.IGNORECASE, | |||||
| ) | ) | ||||
| if not nm or (len(nm) < 5 and not regions.isName(nm[0:2])): | if not nm or (len(nm) < 5 and not regions.isName(nm[0:2])): | ||||
| return nm | return nm |
| def rmHtmlTag(line): | def rmHtmlTag(line): | ||||
| return re.sub(r"<[a-z0-9.\"=';,:\+_/ -]+>", " ", line, 100000, re.IGNORECASE) | |||||
| return re.sub(r"<[a-z0-9.\"=';,:\+_/ -]+>", " ", line, count=100000, flags=re.IGNORECASE) | |||||
| def highest_degree(dg): | def highest_degree(dg): | ||||
| (r".*国有.*", "国企"), | (r".*国有.*", "国企"), | ||||
| (r"[ ()\(\)人/·0-9-]+", ""), | (r"[ ()\(\)人/·0-9-]+", ""), | ||||
| (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]: | (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]: | ||||
| cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE) | |||||
| cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], count=1000, flags=re.IGNORECASE) | |||||
| if len(cv["corporation_type"]) < 2: | if len(cv["corporation_type"]) < 2: | ||||
| del cv["corporation_type"] | del cv["corporation_type"] | ||||
| type("")) else sections[i][0]).strip() | type("")) else sections[i][0]).strip() | ||||
| if not re.match(r"(contents|目录|目次|table of contents|致谢|acknowledge)$", | if not re.match(r"(contents|目录|目次|table of contents|致谢|acknowledge)$", | ||||
| re.sub(r"( | |\u3000)+", "", get(i).split("@@")[0], re.IGNORECASE)): | |||||
| re.sub(r"( | |\u3000)+", "", get(i).split("@@")[0], flags=re.IGNORECASE)): | |||||
| i += 1 | i += 1 | ||||
| continue | continue | ||||
| sections.pop(i) | sections.pop(i) |