|
|
|
|
|
|
|
|
rag_tokenizer.tradi2simp( |
|
|
rag_tokenizer.tradi2simp( |
|
|
rag_tokenizer.strQ2B( |
|
|
rag_tokenizer.strQ2B( |
|
|
txt.lower()))).strip() |
|
|
txt.lower()))).strip() |
|
|
txt = EsQueryer.rmWWW(txt) |
|
|
|
|
|
|
|
|
|
|
|
if not self.isChinese(txt): |
|
|
if not self.isChinese(txt): |
|
|
|
|
|
txt = EsQueryer.rmWWW(txt) |
|
|
tks = rag_tokenizer.tokenize(txt).split(" ") |
|
|
tks = rag_tokenizer.tokenize(txt).split(" ") |
|
|
tks_w = self.tw.weights(tks) |
|
|
tks_w = self.tw.weights(tks) |
|
|
tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w] |
|
|
tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w] |
|
|
|
|
|
|
|
|
return False |
|
|
return False |
|
|
return True |
|
|
return True |
|
|
|
|
|
|
|
|
|
|
|
txt = EsQueryer.rmWWW(txt) |
|
|
qs, keywords = [], [] |
|
|
qs, keywords = [], [] |
|
|
for tt in self.tw.split(txt)[:256]: # .split(" "): |
|
|
for tt in self.tw.split(txt)[:256]: # .split(" "): |
|
|
if not tt: |
|
|
if not tt: |