Source code for scripts.utils.logging

"""
Centralized Logging Module
===========================

Comprehensive logging system with custom SUCCESS level, dual output (file + console),
and intelligent filtering. Features timestamped files and automatic log directory creation.

New in v0.0.12:
- Enhanced verbose logging with detailed formatter
- VerboseLogger class for tree-view, step-by-step debugging
- Better error context and stack traces
- Timing information for all operations

Key Features
------------
- **Custom SUCCESS Level**: Between INFO and WARNING for successful operations
- **Dual Output**: Console (clean) and file (detailed) with independent filtering
- **Verbose Mode**: Tree-view logging with context managers (DEBUG level)
- **Timestamped Logs**: Automatic log file creation in ``.logs/`` directory
- **UTF-8 Support**: International character encoding
- **Progress Bar Integration**: Works seamlessly with tqdm

Log Levels
----------
- DEBUG (10): Verbose mode only - detailed file processing, timing, metrics
- INFO (20): Default - major steps and summaries
- SUCCESS (25): Custom level - successful completions (console + file)
- WARNING (30): Potential issues
- ERROR (40): Failures
- CRITICAL (50): Fatal errors

Console vs. File Output
-----------------------
- **Console**: Only SUCCESS, ERROR, and CRITICAL (keeps terminal clean)
- **File**: INFO or DEBUG (depending on --verbose flag) and above

Verbose Logging (VerboseLogger)
--------------------------------
The VerboseLogger class provides tree-view formatted output for detailed debugging:

Usage Example:
    >>> from scripts.utils import logging as log
    >>> vlog = log.get_verbose_logger()
    >>> 
    >>> with vlog.file_processing("data.xlsx", total_records=100):
    ...     vlog.metric("Total rows", 100)
    ...     with vlog.step("Loading data"):
    ...         vlog.detail("Reading sheet 1...")
    ...         vlog.timing("Load time", 0.45)

Output Format:
    ├─ Processing: data.xlsx (100 records)
    │  ├─ Total rows: 100
    │  ├─ Loading data
    │  │  │  Reading sheet 1...
    │  │  │  ⏱ Load time: 0.45s
    │  └─ ✓ Complete

VerboseLogger Methods:
    - file_processing(filename, total_records): Context manager for file-level operations
    - step(step_name): Context manager for processing steps
    - detail(message): Log detailed information
    - metric(label, value): Log metrics/statistics
    - timing(operation, seconds): Log operation timing
    - items_list(label, items, max_show): Log lists with truncation

Integration
-----------
Used by all pipeline modules:
    - scripts/load_dictionary.py: Sheet and table processing
    - scripts/extract_data.py: File extraction and duplicate column removal
    - scripts/deidentify.py: De-identification and validation

See Also
--------
- User Guide: docs/sphinx/user_guide/usage.rst (Verbose Logging Details section)
- Developer Guide: docs/sphinx/developer_guide/architecture.rst (Logging System section)
"""
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Optional, Any

SUCCESS = 25
logging.addLevelName(SUCCESS, "SUCCESS")

_logger: Optional[logging.Logger] = None
_log_file_path: Optional[str] = None

[docs] class CustomFormatter(logging.Formatter): """Custom log formatter that properly handles the SUCCESS log level."""
[docs] def format(self, record): if record.levelno == SUCCESS: record.levelname = "SUCCESS" return super().format(record)
[docs] def setup_logger(name: str = "reportalin", log_level: int = logging.INFO, simple_mode: bool = False) -> logging.Logger: """Set up central logger with file and console handlers. Args: name: Logger name log_level: Logging level (DEBUG, INFO, WARNING, etc.) simple_mode: If True, minimal console output (success/errors only) Note: This function is idempotent - if called multiple times, it returns the same logger instance. Parameters from subsequent calls are ignored. To reconfigure, manually reset the global _logger variable. """ global _logger, _log_file_path if _logger is not None: # Log a debug message if parameters differ from initial setup if _logger.level != log_level: _logger.debug(f"setup_logger called with different log_level ({log_level}), but logger already initialized with level {_logger.level}") return _logger _logger = logging.getLogger(name) _logger.setLevel(log_level) _logger.handlers.clear() logs_dir = Path(__file__).parents[2] / ".logs" logs_dir.mkdir(exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = logs_dir / f"{name}_{timestamp}.log" _log_file_path = str(log_file) # Use detailed format for verbose (DEBUG) logging if log_level == logging.DEBUG: file_formatter = CustomFormatter( '%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s' ) else: file_formatter = CustomFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler(log_file) file_handler.setLevel(log_level) file_handler.setFormatter(file_formatter) # Console handler: behavior depends on simple_mode console_handler = logging.StreamHandler(sys.stdout) if simple_mode: # Simple mode: only show SUCCESS, WARNING, ERROR, and CRITICAL console_handler.setLevel(logging.WARNING) console_handler.setFormatter(CustomFormatter('%(levelname)s: %(message)s')) class SimpleFilter(logging.Filter): """Allow SUCCESS (25), WARNING (30), ERROR (40), and CRITICAL (50).""" def filter(self, record: logging.LogRecord) -> bool: return record.levelno == SUCCESS or record.levelno >= logging.WARNING console_handler.addFilter(SimpleFilter()) else: # Default mode: Show only SUCCESS, ERROR, and CRITICAL (suppress DEBUG, INFO, WARNING) console_handler.setLevel(logging.ERROR) console_handler.setFormatter(CustomFormatter('%(levelname)s: %(message)s')) class SuccessOrErrorFilter(logging.Filter): """Allow SUCCESS (25), ERROR (40), and CRITICAL (50) but suppress WARNING (30).""" def filter(self, record: logging.LogRecord) -> bool: return record.levelno == SUCCESS or record.levelno >= logging.ERROR console_handler.addFilter(SuccessOrErrorFilter()) _logger.addHandler(file_handler) _logger.addHandler(console_handler) _logger.info(f"Logging initialized. Log file: {log_file}") return _logger
[docs] def get_logger(name: Optional[str] = None) -> logging.Logger: """Get the configured logger instance or set it up if not already done. Args: name: Logger name (optional). Accepted for API compatibility with standard Python logging patterns (e.g., ``logging.getLogger(__name__)``), but currently ignored as this returns a singleton logger instance shared by all modules. Returns: The global 'reportalin' logger instance Note: This function implements a singleton pattern - all modules share the same logger instance. The name parameter is accepted for compatibility with standard Python logging but is currently ignored. A debug message is logged if a name is provided and the logger is already initialized. Examples: Standard usage (singleton pattern):: from scripts.utils import get_logger logger = get_logger() logger.info("Processing data...") Compatible with Python logging pattern:: logger = get_logger(__name__) # Works but returns singleton logger logger.info("Message logged") See Also: :func:`setup_logger` - Configure the singleton logger with custom settings """ # Log a debug message if name is provided (helps users understand singleton behavior) if name is not None and _logger is not None: _logger.debug( f"get_logger(name='{name}') called but returning singleton logger instance. " f"All modules share the same 'reportalin' logger." ) return _logger if _logger else setup_logger()
[docs] def get_log_file_path() -> Optional[str]: """Get the path to the current log file.""" return _log_file_path
def _append_log_path(msg: str, include_log_path: bool) -> str: """Helper function to append log file path to error/warning messages.""" if include_log_path and get_log_file_path(): return f"{msg}\nFor more details, check the log file at: {get_log_file_path()}" return msg
[docs] def debug(msg: str, *args: Any, **kwargs: Any) -> None: """Log a DEBUG level message.""" get_logger().debug(msg, *args, **kwargs)
[docs] def info(msg: str, *args: Any, **kwargs: Any) -> None: """Log an INFO level message.""" get_logger().info(msg, *args, **kwargs)
[docs] def warning(msg: str, *args: Any, include_log_path: bool = False, **kwargs: Any) -> None: """Log a WARNING level message.""" get_logger().warning(_append_log_path(msg, include_log_path), *args, **kwargs)
[docs] def error(msg: str, *args: Any, include_log_path: bool = True, **kwargs: Any) -> None: """Log an ERROR level message with optional log file path.""" get_logger().error(_append_log_path(msg, include_log_path), *args, **kwargs)
[docs] def critical(msg: str, *args: Any, include_log_path: bool = True, **kwargs: Any) -> None: """Log a CRITICAL level message with optional log file path.""" get_logger().critical(_append_log_path(msg, include_log_path), *args, **kwargs)
[docs] def success(msg: str, *args: Any, **kwargs: Any) -> None: """Log a SUCCESS level message (custom level 25).""" get_logger().log(SUCCESS, msg, *args, **kwargs)
# Add success method to Logger class properly def _success_method(self: logging.Logger, msg: str, *args: Any, **kwargs: Any) -> None: """Custom success logging method for Logger instances.""" if self.isEnabledFor(SUCCESS): self.log(SUCCESS, msg, *args, **kwargs) logging.Logger.success = _success_method # type: ignore[attr-defined] # Verbose Logging Utilities (for detailed debugging across all steps) # ====================================================================
[docs] class VerboseLogger: """Centralized verbose logging for detailed output in DEBUG mode. Provides formatted tree-view output for file processing, step execution, and operation timing. Only logs when logger is in DEBUG mode. Usage: vlog = VerboseLogger(log) with vlog.file_processing("file.xlsx", total_records=412): with vlog.step("Processing step"): vlog.detail("Details here") """
[docs] def __init__(self, logger_module): """Initialize with logger module.""" self.log = logger_module self._indent = 0
def _is_verbose(self) -> bool: """Check if verbose (DEBUG) logging is enabled.""" return get_logger().level == logging.DEBUG def _log_tree(self, prefix: str, message: str) -> None: """Log with tree-view formatting.""" if self._is_verbose(): indent = " " * self._indent self.log.debug(f"{indent}{prefix}{message}") class _ContextManager: """Context manager for tree-view logging blocks.""" def __init__(self, vlog: 'VerboseLogger', prefix: str, header: str, footer: str = None): self.vlog = vlog self.prefix = prefix self.header = header self.footer = footer def __enter__(self): self.vlog._log_tree(self.prefix, self.header) self.vlog._indent += 1 return self def __exit__(self, *args): self.vlog._indent -= 1 if self.footer: # Use └─ for footer instead of ├─ to show the final item self.vlog._log_tree("└─ ", self.footer)
[docs] def file_processing(self, filename: str, total_records: int = None): """Context manager for processing a file.""" header = f"Processing: {filename}" if total_records is not None: header += f" ({total_records} records)" return self._ContextManager(self, "├─ ", header, "✓ Complete")
[docs] def step(self, step_name: str): """Context manager for a processing step.""" return self._ContextManager(self, "├─ ", step_name)
[docs] def detail(self, message: str) -> None: """Log a detail message within a step.""" self._log_tree("│ ", message)
[docs] def metric(self, label: str, value: Any) -> None: """Log a metric/statistic.""" self._log_tree("├─ ", f"{label}: {value}")
[docs] def timing(self, operation: str, seconds: float) -> None: """Log operation timing.""" self._log_tree("├─ ", f"⏱ {operation}: {seconds:.2f}s")
[docs] def items_list(self, label: str, items: list, max_show: int = 5) -> None: """Log a list of items with truncation if too long.""" if not self._is_verbose(): return if len(items) <= max_show: self.detail(f"{label}: {', '.join(str(i) for i in items)}") else: self.detail(f"{label}: {', '.join(str(i) for i in items[:max_show])} ... (+{len(items)-max_show} more)")
# Create a global VerboseLogger instance for use across modules _verbose_logger: Optional[VerboseLogger] = None
[docs] def get_verbose_logger() -> VerboseLogger: """Get or create the global VerboseLogger instance.""" global _verbose_logger if _verbose_logger is None: _verbose_logger = VerboseLogger(sys.modules[__name__]) return _verbose_logger