You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import secrets
  2. import struct
  3. import time
  4. import uuid
  5. # Reference for UUIDv7 specification:
  6. # RFC 9562, Section 5.7 - https://www.rfc-editor.org/rfc/rfc9562.html#section-5.7
  7. # Define the format for packing the timestamp as an unsigned 64-bit integer (big-endian).
  8. #
  9. # For details on the `struct.pack` format, refer to:
  10. # https://docs.python.org/3/library/struct.html#byte-order-size-and-alignment
  11. _PACK_TIMESTAMP = ">Q"
  12. # Define the format for packing the 12-bit random data A (as specified in RFC 9562 Section 5.7)
  13. # into an unsigned 16-bit integer (big-endian).
  14. _PACK_RAND_A = ">H"
  15. def _create_uuidv7_bytes(timestamp_ms: int, random_bytes: bytes) -> bytes:
  16. """Create UUIDv7 byte structure with given timestamp and random bytes.
  17. This is a private helper function that handles the common logic for creating
  18. UUIDv7 byte structure according to RFC 9562 specification.
  19. UUIDv7 Structure:
  20. - 48 bits: timestamp (milliseconds since Unix epoch)
  21. - 12 bits: random data A (with version bits)
  22. - 62 bits: random data B (with variant bits)
  23. The function performs the following operations:
  24. 1. Creates a 128-bit (16-byte) UUID structure
  25. 2. Packs the timestamp into the first 48 bits (6 bytes)
  26. 3. Sets the version bits to 7 (0111) in the correct position
  27. 4. Sets the variant bits to 10 (binary) in the correct position
  28. 5. Fills the remaining bits with the provided random bytes
  29. Args:
  30. timestamp_ms: The timestamp in milliseconds since Unix epoch (48 bits).
  31. random_bytes: Random bytes to use for the random portions (must be 10 bytes).
  32. First 2 bytes are used for random data A (12 bits after version).
  33. Last 8 bytes are used for random data B (62 bits after variant).
  34. Returns:
  35. A 16-byte bytes object representing the complete UUIDv7 structure.
  36. Note:
  37. This function assumes the random_bytes parameter is exactly 10 bytes.
  38. The caller is responsible for providing appropriate random data.
  39. """
  40. # Create the 128-bit UUID structure
  41. uuid_bytes = bytearray(16)
  42. # Pack timestamp (48 bits) into first 6 bytes
  43. uuid_bytes[0:6] = struct.pack(_PACK_TIMESTAMP, timestamp_ms)[2:8] # Take last 6 bytes of 8-byte big-endian
  44. # Next 16 bits: random data A (12 bits) + version (4 bits)
  45. # Take first 2 random bytes and set version to 7
  46. rand_a = struct.unpack(_PACK_RAND_A, random_bytes[0:2])[0]
  47. # Clear the highest 4 bits to make room for the version field
  48. # by performing a bitwise AND with 0x0FFF (binary: 0b0000_1111_1111_1111).
  49. rand_a = rand_a & 0x0FFF
  50. # Set the version field to 7 (binary: 0111) by performing a bitwise OR with 0x7000 (binary: 0b0111_0000_0000_0000).
  51. rand_a = rand_a | 0x7000
  52. uuid_bytes[6:8] = struct.pack(_PACK_RAND_A, rand_a)
  53. # Last 64 bits: random data B (62 bits) + variant (2 bits)
  54. # Use remaining 8 random bytes and set variant to 10 (binary)
  55. uuid_bytes[8:16] = random_bytes[2:10]
  56. # Set variant bits (first 2 bits of byte 8 should be '10')
  57. uuid_bytes[8] = (uuid_bytes[8] & 0x3F) | 0x80 # Set variant to 10xxxxxx
  58. return bytes(uuid_bytes)
  59. def uuidv7(timestamp_ms: int | None = None) -> uuid.UUID:
  60. """Generate a UUID version 7 according to RFC 9562 specification.
  61. UUIDv7 features a time-ordered value field derived from the widely
  62. implemented and well known Unix Epoch timestamp source, the number of
  63. milliseconds since midnight 1 Jan 1970 UTC, leap seconds excluded.
  64. Structure:
  65. - 48 bits: timestamp (milliseconds since Unix epoch)
  66. - 12 bits: random data A (with version bits)
  67. - 62 bits: random data B (with variant bits)
  68. Args:
  69. timestamp_ms: The timestamp used when generating UUID, use the current time if unspecified.
  70. Should be an integer representing milliseconds since Unix epoch.
  71. Returns:
  72. A UUID object representing a UUIDv7.
  73. Example:
  74. >>> import time
  75. >>> # Generate UUIDv7 with current time
  76. >>> uuid_current = uuidv7()
  77. >>> # Generate UUIDv7 with specific timestamp
  78. >>> uuid_specific = uuidv7(int(time.time() * 1000))
  79. """
  80. if timestamp_ms is None:
  81. timestamp_ms = int(time.time() * 1000)
  82. # Generate 10 random bytes for the random portions
  83. random_bytes = secrets.token_bytes(10)
  84. # Create UUIDv7 bytes using the helper function
  85. uuid_bytes = _create_uuidv7_bytes(timestamp_ms, random_bytes)
  86. return uuid.UUID(bytes=uuid_bytes)
  87. def uuidv7_timestamp(id_: uuid.UUID) -> int:
  88. """Extract the timestamp from a UUIDv7.
  89. UUIDv7 contains a 48-bit timestamp field representing milliseconds since
  90. the Unix epoch (1970-01-01 00:00:00 UTC). This function extracts and
  91. returns that timestamp as an integer representing milliseconds since the epoch.
  92. Args:
  93. id_: A UUID object that should be a UUIDv7 (version 7).
  94. Returns:
  95. The timestamp as an integer representing milliseconds since Unix epoch.
  96. Raises:
  97. ValueError: If the provided UUID is not version 7.
  98. Example:
  99. >>> uuid_v7 = uuidv7()
  100. >>> timestamp = uuidv7_timestamp(uuid_v7)
  101. >>> print(f"UUID was created at: {timestamp} ms")
  102. """
  103. # Verify this is a UUIDv7
  104. if id_.version != 7:
  105. raise ValueError(f"Expected UUIDv7 (version 7), got version {id_.version}")
  106. # Extract the UUID bytes
  107. uuid_bytes = id_.bytes
  108. # Extract the first 48 bits (6 bytes) as the timestamp in milliseconds
  109. # Pad with 2 zero bytes at the beginning to make it 8 bytes for unpacking as Q (unsigned long long)
  110. timestamp_bytes = b"\x00\x00" + uuid_bytes[0:6]
  111. ts_in_ms = struct.unpack(_PACK_TIMESTAMP, timestamp_bytes)[0]
  112. # Return timestamp directly in milliseconds as integer
  113. assert isinstance(ts_in_ms, int)
  114. return ts_in_ms
  115. def uuidv7_boundary(timestamp_ms: int) -> uuid.UUID:
  116. """Generate a non-random uuidv7 with the given timestamp (first 48 bits) and
  117. all random bits to 0. As the smallest possible uuidv7 for that timestamp,
  118. it may be used as a boundary for partitions.
  119. """
  120. # Use zero bytes for all random portions
  121. zero_random_bytes = b"\x00" * 10
  122. # Create UUIDv7 bytes using the helper function
  123. uuid_bytes = _create_uuidv7_bytes(timestamp_ms, zero_random_bytes)
  124. return uuid.UUID(bytes=uuid_bytes)