選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

synonym.py 2.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import logging
  17. import json
  18. import os
  19. import time
  20. import re
  21. from nltk.corpus import wordnet
  22. from api.utils.file_utils import get_project_base_directory
  23. class Dealer:
  24. def __init__(self, redis=None):
  25. self.lookup_num = 100000000
  26. self.load_tm = time.time() - 1000000
  27. self.dictionary = None
  28. path = os.path.join(get_project_base_directory(), "rag/res", "synonym.json")
  29. try:
  30. self.dictionary = json.load(open(path, 'r'))
  31. except Exception:
  32. logging.warning("Missing synonym.json")
  33. self.dictionary = {}
  34. if not redis:
  35. logging.warning(
  36. "Realtime synonym is disabled, since no redis connection.")
  37. if not len(self.dictionary.keys()):
  38. logging.warning("Fail to load synonym")
  39. self.redis = redis
  40. self.load()
  41. def load(self):
  42. if not self.redis:
  43. return
  44. if self.lookup_num < 100:
  45. return
  46. tm = time.time()
  47. if tm - self.load_tm < 3600:
  48. return
  49. self.load_tm = time.time()
  50. self.lookup_num = 0
  51. d = self.redis.get("kevin_synonyms")
  52. if not d:
  53. return
  54. try:
  55. d = json.loads(d)
  56. self.dictionary = d
  57. except Exception as e:
  58. logging.error("Fail to load synonym!" + str(e))
  59. def lookup(self, tk):
  60. if re.match(r"[a-z]+$", tk):
  61. res = list(set([re.sub("_", " ", syn.name().split(".")[0]) for syn in wordnet.synsets(tk)]) - set([tk]))
  62. return [t for t in res if t]
  63. self.lookup_num += 1
  64. self.load()
  65. res = self.dictionary.get(re.sub(r"[ \t]+", " ", tk.lower()), [])
  66. if isinstance(res, str):
  67. res = [res]
  68. return res
  69. if __name__ == '__main__':
  70. dl = Dealer()
  71. print(dl.dictionary)