|
|
|
@@ -1,4 +1,4 @@ |
|
|
|
# |
|
|
|
# |
|
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. |
|
|
|
# |
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
@@ -158,7 +158,7 @@ class Dealer: |
|
|
|
tks.append(t) |
|
|
|
return tks |
|
|
|
|
|
|
|
def weights(self, tks): |
|
|
|
def weights(self, tks, preprocess=True): |
|
|
|
def skill(t): |
|
|
|
if t not in self.sk: |
|
|
|
return 1 |
|
|
|
@@ -222,14 +222,20 @@ class Dealer: |
|
|
|
def idf(s, N): return math.log10(10 + ((N - s + 0.5) / (s + 0.5))) |
|
|
|
|
|
|
|
tw = [] |
|
|
|
for tk in tks: |
|
|
|
tt = self.tokenMerge(self.pretoken(tk, True)) |
|
|
|
idf1 = np.array([idf(freq(t), 10000000) for t in tt]) |
|
|
|
idf2 = np.array([idf(df(t), 1000000000) for t in tt]) |
|
|
|
if not preprocess: |
|
|
|
idf1 = np.array([idf(freq(t), 10000000) for t in tks]) |
|
|
|
idf2 = np.array([idf(df(t), 1000000000) for t in tks]) |
|
|
|
wts = (0.3 * idf1 + 0.7 * idf2) * \ |
|
|
|
np.array([ner(t) * postag(t) for t in tt]) |
|
|
|
|
|
|
|
tw.extend(zip(tt, wts)) |
|
|
|
np.array([ner(t) * postag(t) for t in tks]) |
|
|
|
tw = zip(tks, wts) |
|
|
|
else: |
|
|
|
for tk in tks: |
|
|
|
tt = self.tokenMerge(self.pretoken(tk, True)) |
|
|
|
idf1 = np.array([idf(freq(t), 10000000) for t in tt]) |
|
|
|
idf2 = np.array([idf(df(t), 1000000000) for t in tt]) |
|
|
|
wts = (0.3 * idf1 + 0.7 * idf2) * \ |
|
|
|
np.array([ner(t) * postag(t) for t in tt]) |
|
|
|
tw.extend(zip(tt, wts)) |
|
|
|
|
|
|
|
S = np.sum([s for _, s in tw]) |
|
|
|
return [(t, s / S) for t, s in tw] |