Source code for neosqlite.collection.text_search

"""
Enhanced text search functionality for NeoSQLite with international character support.
"""

import logging
import re
import unicodedata
from functools import cache
from typing import Any

logger = logging.getLogger(__name__)


[docs] class TextSearchOptimizer: """ Optimize text search operations with caching and Unicode support. This class provides optimized text search functionality with: - LRU caching for compiled regex patterns - Unicode normalization for international character support - Diacritic-insensitive matching - Case-insensitive searching """
[docs] @staticmethod @cache def compile_pattern(search_term: str): """ Compile and cache regex patterns for better performance. Args: search_term: The term to search for Returns: Compiled regex pattern or None if compilation fails """ try: return re.compile( re.escape(search_term), re.IGNORECASE | re.UNICODE ) except re.error as e: logger.debug(f"Regex compilation failed: {e}") return None
[docs] @staticmethod def normalize_text(text: str) -> str: """ Normalize text for comparison by removing diacritics. Args: text: Text to normalize Returns: Normalized text with diacritics removed """ # Normalize to decomposed form (NFD) normalized = unicodedata.normalize("NFD", text.lower()) # Remove combining characters (diacritics) return "".join(c for c in normalized if unicodedata.category(c) != "Mn")
[docs] @staticmethod @cache def get_normalized_pattern(search_term: str): """ Get normalized pattern for diacritic-insensitive matching. Args: search_term: The term to search for Returns: Compiled regex pattern for normalized text or None if compilation fails """ try: normalized = TextSearchOptimizer.normalize_text(search_term) return re.compile(re.escape(normalized), re.IGNORECASE | re.UNICODE) except re.error as e: logger.debug(f"Regex compilation failed: {e}") return None
[docs] def simple_text_contains(text: str, search_term: str) -> bool: """ Simple case-insensitive text containment check. This is a fast fallback for basic ASCII text matching. Args: text: The text to search in search_term: The term to search for Returns: True if the text contains the search term, False otherwise """ if not isinstance(text, str) or not isinstance(search_term, str): return False return search_term.lower() in text.lower()