|
|
|
@@ -71,7 +71,19 @@ class FulltextQueryer: |
|
|
|
txt = otxt |
|
|
|
return txt |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def add_space_between_eng_zh(txt): |
|
|
|
# (ENG/ENG+NUM) + ZH |
|
|
|
txt = re.sub(r'([A-Za-z]+[0-9]+)([\u4e00-\u9fa5]+)', r'\1 \2', txt) |
|
|
|
# ENG + ZH |
|
|
|
txt = re.sub(r'([A-Za-z])([\u4e00-\u9fa5]+)', r'\1 \2', txt) |
|
|
|
# ZH + (ENG/ENG+NUM) |
|
|
|
txt = re.sub(r'([\u4e00-\u9fa5]+)([A-Za-z]+[0-9]+)', r'\1 \2', txt) |
|
|
|
txt = re.sub(r'([\u4e00-\u9fa5]+)([A-Za-z])', r'\1 \2', txt) |
|
|
|
return txt |
|
|
|
|
|
|
|
def question(self, txt, tbl="qa", min_match: float = 0.6): |
|
|
|
txt = FulltextQueryer.add_space_between_eng_zh(txt) |
|
|
|
txt = re.sub( |
|
|
|
r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+", |
|
|
|
" ", |