Source code for df_eval.lookup

"""
Lookup functionality with resolvers for external data sources.

This module provides lookup capabilities with various resolvers
(database, HTTP, file) and TTL caching.
"""

import time
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
import pandas as pd


[docs] class Resolver(ABC): """Abstract base class for resolvers."""
[docs] @abstractmethod def resolve(self, key: Any) -> Any: """ Resolve a key to a value. Args: key: The key to resolve. Returns: The resolved value. """ pass
[docs] class CachedResolver(Resolver): """Resolver with TTL cache support."""
[docs] def __init__(self, resolver: Resolver, ttl_seconds: float = 300.0): """ Initialize a cached resolver. Args: resolver: The underlying resolver. ttl_seconds: Time-to-live for cache entries in seconds. """ self.resolver = resolver self.ttl_seconds = ttl_seconds self._cache: Dict[Any, tuple[Any, float]] = {}
[docs] def resolve(self, key: Any) -> Any: """Resolve with caching.""" current_time = time.time() # Check cache if key in self._cache: value, timestamp = self._cache[key] if current_time - timestamp < self.ttl_seconds: return value # Resolve and cache value = self.resolver.resolve(key) self._cache[key] = (value, current_time) return value
[docs] def clear_cache(self) -> None: """Clear the cache.""" self._cache.clear()
[docs] class DictResolver(Resolver): """Simple dictionary-based resolver."""
[docs] def __init__(self, mapping: Dict[Any, Any], default: Any = None): """ Initialize a dictionary resolver. Args: mapping: Dictionary mapping keys to values. default: Default value if key not found. """ self.mapping = mapping self.default = default
[docs] def resolve(self, key: Any) -> Any: """Resolve from dictionary.""" return self.mapping.get(key, self.default)
[docs] class FileResolver(Resolver): """File-based resolver that reads from CSV/JSON files."""
[docs] def __init__(self, filepath: str, key_column: str, value_column: str): """ Initialize a file resolver. Args: filepath: Path to the file. key_column: Name of the key column. value_column: Name of the value column. """ self.filepath = filepath self.key_column = key_column self.value_column = value_column self._data: Optional[pd.DataFrame] = None
def _load_data(self) -> pd.DataFrame: """Load data from file.""" if self._data is None: if self.filepath.endswith('.csv'): self._data = pd.read_csv(self.filepath) elif self.filepath.endswith('.json'): self._data = pd.read_json(self.filepath) else: raise ValueError(f"Unsupported file format: {self.filepath}") return self._data
[docs] def resolve(self, key: Any) -> Any: """Resolve from file.""" data = self._load_data() result = data[data[self.key_column] == key] if len(result) > 0: return result[self.value_column].iloc[0] return None
[docs] class DatabaseResolver(Resolver): """Database resolver (placeholder for SQL database lookups)."""
[docs] def __init__(self, connection_string: str, table: str, key_column: str, value_column: str): """ Initialize a database resolver. Args: connection_string: Database connection string. table: Table name. key_column: Name of the key column. value_column: Name of the value column. """ self.connection_string = connection_string self.table = table self.key_column = key_column self.value_column = value_column
[docs] def resolve(self, key: Any) -> Any: """ Resolve from database. Note: This is a placeholder. Actual implementation would require a database connection library like sqlalchemy. """ raise NotImplementedError("Database resolver requires a database connection library")
[docs] class HTTPResolver(Resolver): """HTTP API resolver (placeholder for REST API lookups)."""
[docs] def __init__(self, base_url: str, key_param: str = "key"): """ Initialize an HTTP resolver. Args: base_url: Base URL for the API. key_param: Query parameter name for the key. """ self.base_url = base_url self.key_param = key_param
[docs] def resolve(self, key: Any) -> Any: """ Resolve from HTTP API. Note: This is a placeholder. Actual implementation would require an HTTP library like requests. """ raise NotImplementedError("HTTP resolver requires an HTTP library like requests")
[docs] def lookup(series: pd.Series, resolver: Resolver, on_missing: str = "null") -> pd.Series: """ Lookup values using a resolver. Args: series: The series containing keys to lookup. resolver: The resolver to use for lookups. on_missing: How to handle missing values ("null", "raise", "keep"). - "null": Return None/NaN for missing values - "raise": Raise an exception for missing values - "keep": Keep the original key value Returns: A series with resolved values. Raises: ValueError: If on_missing is "raise" and a key cannot be resolved. """ results = [] for key in series: try: value = resolver.resolve(key) if value is None and on_missing == "raise": raise ValueError(f"Could not resolve key: {key}") elif value is None and on_missing == "keep": results.append(key) else: results.append(value) except Exception as e: if on_missing == "raise": raise elif on_missing == "keep": results.append(key) else: results.append(None) return pd.Series(results, index=series.index)