You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

position_helper.py 4.5KB

пре 1 година
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import os
  2. from collections import OrderedDict
  3. from collections.abc import Callable
  4. from functools import lru_cache
  5. from typing import TypeVar
  6. from configs import dify_config
  7. from core.tools.utils.yaml_utils import load_yaml_file_cached
  8. @lru_cache(maxsize=128)
  9. def get_position_map(folder_path: str, *, file_name: str = "_position.yaml") -> dict[str, int]:
  10. """
  11. Get the mapping from name to index from a YAML file
  12. :param folder_path:
  13. :param file_name: the YAML file name, default to '_position.yaml'
  14. :return: a dict with name as key and index as value
  15. """
  16. # FIXME(-LAN-): Cache position maps to prevent file descriptor exhaustion during high-load benchmarks
  17. position_file_path = os.path.join(folder_path, file_name)
  18. try:
  19. yaml_content = load_yaml_file_cached(file_path=position_file_path)
  20. except Exception:
  21. yaml_content = []
  22. positions = [item.strip() for item in yaml_content if item and isinstance(item, str) and item.strip()]
  23. return {name: index for index, name in enumerate(positions)}
  24. @lru_cache(maxsize=128)
  25. def get_tool_position_map(folder_path: str, file_name: str = "_position.yaml") -> dict[str, int]:
  26. """
  27. Get the mapping for tools from name to index from a YAML file.
  28. :param folder_path:
  29. :param file_name: the YAML file name, default to '_position.yaml'
  30. :return: a dict with name as key and index as value
  31. """
  32. position_map = get_position_map(folder_path, file_name=file_name)
  33. return pin_position_map(
  34. position_map,
  35. pin_list=dify_config.POSITION_TOOL_PINS_LIST,
  36. )
  37. def pin_position_map(original_position_map: dict[str, int], pin_list: list[str]) -> dict[str, int]:
  38. """
  39. Pin the items in the pin list to the beginning of the position map.
  40. Overall logic: exclude > include > pin
  41. :param original_position_map: the position map to be sorted and filtered
  42. :param pin_list: the list of pins to be put at the beginning
  43. :return: the sorted position map
  44. """
  45. positions = sorted(original_position_map.keys(), key=lambda x: original_position_map[x])
  46. # Add pins to position map
  47. position_map = {name: idx for idx, name in enumerate(pin_list)}
  48. # Add remaining positions to position map
  49. start_idx = len(position_map)
  50. for name in positions:
  51. if name not in position_map:
  52. position_map[name] = start_idx
  53. start_idx += 1
  54. return position_map
  55. T = TypeVar("T")
  56. def is_filtered(
  57. include_set: set[str],
  58. exclude_set: set[str],
  59. data: T,
  60. name_func: Callable[[T], str],
  61. ) -> bool:
  62. """
  63. Check if the object should be filtered out.
  64. Overall logic: exclude > include > pin
  65. :param include_set: the set of names to be included
  66. :param exclude_set: the set of names to be excluded
  67. :param name_func: the function to get the name of the object
  68. :param data: the data to be filtered
  69. :return: True if the object should be filtered out, False otherwise
  70. """
  71. if not data:
  72. return False
  73. if not include_set and not exclude_set:
  74. return False
  75. name = name_func(data)
  76. if name in exclude_set: # exclude_set is prioritized
  77. return True
  78. if include_set and name not in include_set: # filter out only if include_set is not empty
  79. return True
  80. return False
  81. def sort_by_position_map(
  82. position_map: dict[str, int],
  83. data: list[T],
  84. name_func: Callable[[T], str],
  85. ):
  86. """
  87. Sort the objects by the position map.
  88. If the name of the object is not in the position map, it will be put at the end.
  89. :param position_map: the map holding positions in the form of {name: index}
  90. :param name_func: the function to get the name of the object
  91. :param data: the data to be sorted
  92. :return: the sorted objects
  93. """
  94. if not position_map or not data:
  95. return data
  96. return sorted(data, key=lambda x: position_map.get(name_func(x), float("inf")))
  97. def sort_to_dict_by_position_map(
  98. position_map: dict[str, int],
  99. data: list[T],
  100. name_func: Callable[[T], str],
  101. ):
  102. """
  103. Sort the objects into a ordered dict by the position map.
  104. If the name of the object is not in the position map, it will be put at the end.
  105. :param position_map: the map holding positions in the form of {name: index}
  106. :param name_func: the function to get the name of the object
  107. :param data: the data to be sorted
  108. :return: an OrderedDict with the sorted pairs of name and object
  109. """
  110. sorted_items = sort_by_position_map(position_map, data, name_func)
  111. return OrderedDict((name_func(item), item) for item in sorted_items)