Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

utils.py 2.3KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. """
  17. Reference:
  18. - [graphrag](https://github.com/microsoft/graphrag)
  19. """
  20. import html
  21. import re
  22. from collections.abc import Callable
  23. from typing import Any
  24. ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
  25. def perform_variable_replacements(
  26. input: str, history: list[dict]=[], variables: dict | None ={}
  27. ) -> str:
  28. """Perform variable replacements on the input string and in a chat log."""
  29. result = input
  30. def replace_all(input: str) -> str:
  31. result = input
  32. if variables:
  33. for entry in variables:
  34. result = result.replace(f"{{{entry}}}", variables[entry])
  35. return result
  36. result = replace_all(result)
  37. for i in range(len(history)):
  38. entry = history[i]
  39. if entry.get("role") == "system":
  40. history[i]["content"] = replace_all(entry.get("content") or "")
  41. return result
  42. def clean_str(input: Any) -> str:
  43. """Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
  44. # If we get non-string input, just give it back
  45. if not isinstance(input, str):
  46. return input
  47. result = html.unescape(input.strip())
  48. # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
  49. return re.sub(r"[\"\x00-\x1f\x7f-\x9f]", "", result)
  50. def dict_has_keys_with_types(
  51. data: dict, expected_fields: list[tuple[str, type]]
  52. ) -> bool:
  53. """Return True if the given dictionary has the given keys with the given types."""
  54. for field, field_type in expected_fields:
  55. if field not in data:
  56. return False
  57. value = data[field]
  58. if not isinstance(value, field_type):
  59. return False
  60. return True