| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- import re
-
-
- def callback__(progress, msg, func):
- if not func :return
- func(progress, msg)
-
-
- BULLET_PATTERN = [[
- r"第[零一二三四五六七八九十百]+编",
- r"第[零一二三四五六七八九十百]+章",
- r"第[零一二三四五六七八九十百]+节",
- r"第[零一二三四五六七八九十百]+条",
- r"[\((][零一二三四五六七八九十百]+[\))]",
- ], [
- r"[0-9]{,3}[\. 、]",
- r"[0-9]{,2}\.[0-9]{,2}",
- r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
- r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
- ], [
- r"[零一二三四五六七八九十百]+[ 、]",
- r"[\((][零一二三四五六七八九十百]+[\))]",
- r"[\((][0-9]{,2}[\))]",
- ] ,[
- r"PART (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)",
- r"Chapter (I+V?|VI*|XI|IX|X)",
- r"Section [0-9]+",
- r"Article [0-9]+"
- ]
- ]
-
-
- def bullets_category(sections):
- global BULLET_PATTERN
- hits = [0] * len(BULLET_PATTERN)
- for i, pro in enumerate(BULLET_PATTERN):
- for sec in sections:
- for p in pro:
- if re.match(p, sec):
- hits[i] += 1
- break
- maxium = 0
- res = -1
- for i,h in enumerate(hits):
- if h <= maxium:continue
- res = i
- maxium = h
- return res
|