Source code for neosqlite.objectid

"""
ObjectId implementation for NeoSQLite that follows MongoDB's specification.

Based on MongoDB's ObjectId specification:
- 4 bytes: timestamp (seconds since Unix epoch)
- 5 bytes: random value (generated once per process)
- 3 bytes: counter (incrementing from a random value)

This implementation provides full compatibility with MongoDB ObjectIds
while being optimized for NeoSQLite's local-only architecture.
"""

from __future__ import annotations

import logging
import os
import random
import threading
import time
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    pass

logger = logging.getLogger(__name__)


[docs] class ObjectId: """ A MongoDB-compatible ObjectId implementation for NeoSQLite. This class generates 12-byte identifiers following MongoDB's specification: - 4 bytes: timestamp (seconds since Unix epoch) - 5 bytes: random value (generated once per process) - 3 bytes: counter (incrementing from a random value) Provides full compatibility with MongoDB ObjectIds while working with NeoSQLite. """ _random_bytes: bytes | None = None _counter: int | None = None _counter_lock = threading.Lock()
[docs] def __init__(self, oid: str | bytes | ObjectId | int | None = None): """ Initialize a new ObjectId. Args: oid: Can be a 12-byte binary representation, a 24-character hex string, another ObjectId instance, an integer (which replaces the timestamp), or None to generate a new ObjectId. Raises: TypeError: If the input type is not supported ValueError: If the input format is invalid """ match oid: case None: # Generate a new ObjectId self._id = self._generate_new_id() case ObjectId(): self._id = oid.binary case str(): if len(oid) != 24: raise ValueError( "ObjectId hex string must be exactly 24 characters" ) try: self._id = bytes.fromhex(oid) except ValueError as e: logger.debug(f"Invalid ObjectId hex string '{oid}': {e}") raise ValueError( "ObjectId hex string contains invalid characters" ) case bytes(): if len(oid) != 12: raise ValueError("ObjectId must be exactly 12 bytes") self._id = oid case int(): # If an integer is provided, it replaces the timestamp part # according to MongoDB specification if oid < 0 or oid > 0xFFFFFFFF: raise ValueError( "Integer timestamp must be between 0 and 0xFFFFFFFF" ) # Generate a new ObjectId but with the provided timestamp self._id = self._generate_new_id_with_timestamp(oid) case _: raise TypeError( "ObjectId must be a string, bytes, ObjectId, int, or None" )
[docs] @classmethod def _generate_new_id(cls) -> bytes: """ Generate a new 12-byte ObjectId value according to MongoDB specification. This method creates a unique 12-byte identifier consisting of: - 4 bytes: timestamp (seconds since Unix epoch) - 5 bytes: random value (generated once per process) - 3 bytes: counter (incrementing from a random value) The method ensures thread safety by using a lock when accessing shared random bytes and counter values. The random bytes are generated once per process, and the counter is incremented for each new ObjectId. Returns: bytes: A new 12-byte ObjectId value """ # Ensure thread safety for random bytes and counter with cls._counter_lock: if cls._random_bytes is None: # Generate random bytes once (5 bytes), as per MongoDB spec cls._random_bytes = os.urandom(5) if cls._counter is None: # Initialize counter with a random value cls._counter = random.randint(0, 0xFFFFFF) # Increment counter and keep only 3 bytes cls._counter = (cls._counter + 1) % 0x1000000 # Build the 12-byte ObjectId according to MongoDB specification: # 4 bytes: timestamp (Unix timestamp, big-endian) timestamp = int(time.time()).to_bytes(4, "big") # 5 bytes: random value (big-endian) random_bytes = cls._random_bytes # 3 bytes: counter (big-endian) counter = cls._counter.to_bytes(3, "big") return timestamp + random_bytes + counter
[docs] @classmethod def _generate_new_id_with_timestamp(cls, timestamp: int) -> bytes: """ Generate a new 12-byte ObjectId value with a specific timestamp according to MongoDB specification. This method creates a unique 12-byte identifier with the provided timestamp and following MongoDB's format: - 4 bytes: provided timestamp (instead of current time) - 5 bytes: random value (generated once per process) - 3 bytes: counter (incrementing from a random value) The method ensures thread safety by using a lock when accessing shared random bytes and counter values. The random bytes are generated once per process, and the counter is incremented for each new ObjectId. Args: timestamp: An integer representing the Unix timestamp to use for the ObjectId Returns: bytes: A new 12-byte ObjectId value with the specified timestamp """ # Ensure thread safety for random bytes and counter with cls._counter_lock: if cls._random_bytes is None: # Generate random bytes once (5 bytes), as per MongoDB spec cls._random_bytes = os.urandom(5) if cls._counter is None: # Initialize counter with a random value cls._counter = random.randint(0, 0xFFFFFF) # Increment counter and keep only 3 bytes cls._counter = (cls._counter + 1) % 0x1000000 # Build the 12-byte ObjectId according to MongoDB specification: # 4 bytes: provided timestamp (big-endian) timestamp_bytes = timestamp.to_bytes(4, "big") # 5 bytes: random value (big-endian) random_bytes = cls._random_bytes # 3 bytes: counter (big-endian) counter = cls._counter.to_bytes(3, "big") return timestamp_bytes + random_bytes + counter
[docs] @classmethod def is_valid(cls, oid: Any) -> bool: """ Check if the given value is a valid ObjectId. Args: oid: Value to validate Returns: True if the value is a valid ObjectId, False otherwise """ match oid: case ObjectId(): return True case str(): if len(oid) != 24: return False try: int(oid, 16) # Try to parse as hex return True except (TypeError, ValueError) as e: logger.debug(f"Invalid ObjectId hex string '{oid}': {e}") return False case bytes(): return len(oid) == 12 case int(): return 0 <= oid <= 0xFFFFFFFF case _: return False
@property def binary(self) -> bytes: """Get the binary representation of this ObjectId.""" return self._id @property def hex(self) -> str: """Get the hexadecimal string representation of this ObjectId.""" return self._id.hex() def __str__(self) -> str: """Return the hexadecimal string representation.""" return self.hex def __repr__(self) -> str: """Return a string representation of this ObjectId.""" return f"ObjectId('{self.hex}')" def __bytes__(self) -> bytes: """Return the binary representation of this ObjectId.""" return self._id def __eq__(self, other: Any) -> bool: """Check equality with another ObjectId.""" match other: case ObjectId(): return self._id == other._id case bytes(): return self._id == other case str(): try: return self._id == bytes.fromhex(other) except ValueError as e: logger.debug( f"Failed to compare ObjectId with invalid hex string '{other}': {e}" ) return False case _: return False def __ne__(self, other: Any) -> bool: """Check inequality with another ObjectId.""" return not self.__eq__(other) def __hash__(self) -> int: """Return a hash value for this ObjectId.""" return hash(self._id)
[docs] def generation_time(self) -> float: """ Get the generation time of this ObjectId as a Unix timestamp. Returns: Unix timestamp of when this ObjectId was created """ # First 4 bytes contain the timestamp timestamp_bytes = self._id[:4] return int.from_bytes(timestamp_bytes, "big")
[docs] def encode_for_storage(self) -> dict: """ Encode the ObjectId for JSON storage compatibility with NeoSQLite. Returns: A dictionary representation for JSON storage """ return { "__neosqlite_objectid__": True, "id": self.hex, }
[docs] @classmethod def decode_from_storage(cls, encoded_data: dict) -> ObjectId: """ Decode an ObjectId from JSON storage format. Args: encoded_data: Dictionary representation from JSON storage Returns: An ObjectId instance """ if ( not isinstance(encoded_data, dict) or "__neosqlite_objectid__" not in encoded_data ): raise ValueError("Invalid encoded ObjectId data") if "id" not in encoded_data: raise ValueError( "Invalid encoded ObjectId data: missing 'id' field" ) return cls(encoded_data["id"])