You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 1.8KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # Copyright (c) 2024 Microsoft Corporation.
  2. # Licensed under the MIT License
  3. """
  4. Reference:
  5. - [graphrag](https://github.com/microsoft/graphrag)
  6. """
  7. import html
  8. import re
  9. from typing import Any, Callable
  10. ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
  11. def perform_variable_replacements(
  12. input: str, history: list[dict] | None = None, variables: dict | None = None
  13. ) -> str:
  14. """Perform variable replacements on the input string and in a chat log."""
  15. if history is None:
  16. history = []
  17. if variables is None:
  18. variables = {}
  19. result = input
  20. def replace_all(input: str) -> str:
  21. result = input
  22. for k, v in variables.items():
  23. result = result.replace(f"{{{k}}}", v)
  24. return result
  25. result = replace_all(result)
  26. for i, entry in enumerate(history):
  27. if entry.get("role") == "system":
  28. entry["content"] = replace_all(entry.get("content") or "")
  29. return result
  30. def clean_str(input: Any) -> str:
  31. """Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
  32. # If we get non-string input, just give it back
  33. if not isinstance(input, str):
  34. return input
  35. result = html.unescape(input.strip())
  36. # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
  37. return re.sub(r"[\"\x00-\x1f\x7f-\x9f]", "", result)
  38. def dict_has_keys_with_types(
  39. data: dict, expected_fields: list[tuple[str, type]]
  40. ) -> bool:
  41. """Return True if the given dictionary has the given keys with the given types."""
  42. for field, field_type in expected_fields:
  43. if field not in data:
  44. return False
  45. value = data[field]
  46. if not isinstance(value, field_type):
  47. return False
  48. return True