"""
Centralized Logging Module
===========================
Comprehensive logging system with custom SUCCESS level, dual output (file + console),
and intelligent filtering. Features timestamped files and automatic log directory creation.
New in v0.0.12:
- Enhanced verbose logging with detailed formatter
- VerboseLogger class for tree-view, step-by-step debugging
- Better error context and stack traces
- Timing information for all operations
Key Features
------------
- **Custom SUCCESS Level**: Between INFO and WARNING for successful operations
- **Dual Output**: Console (clean) and file (detailed) with independent filtering
- **Verbose Mode**: Tree-view logging with context managers (DEBUG level)
- **Timestamped Logs**: Automatic log file creation in ``.logs/`` directory
- **UTF-8 Support**: International character encoding
- **Progress Bar Integration**: Works seamlessly with tqdm
Log Levels
----------
- DEBUG (10): Verbose mode only - detailed file processing, timing, metrics
- INFO (20): Default - major steps and summaries
- SUCCESS (25): Custom level - successful completions (console + file)
- WARNING (30): Potential issues
- ERROR (40): Failures
- CRITICAL (50): Fatal errors
Console vs. File Output
-----------------------
- **Console**: Only SUCCESS, ERROR, and CRITICAL (keeps terminal clean)
- **File**: INFO or DEBUG (depending on --verbose flag) and above
Verbose Logging (VerboseLogger)
--------------------------------
The VerboseLogger class provides tree-view formatted output for detailed debugging:
Usage Example:
>>> from scripts.utils import logging as log
>>> vlog = log.get_verbose_logger()
>>>
>>> with vlog.file_processing("data.xlsx", total_records=100):
... vlog.metric("Total rows", 100)
... with vlog.step("Loading data"):
... vlog.detail("Reading sheet 1...")
... vlog.timing("Load time", 0.45)
Output Format:
├─ Processing: data.xlsx (100 records)
│ ├─ Total rows: 100
│ ├─ Loading data
│ │ │ Reading sheet 1...
│ │ │ ⏱ Load time: 0.45s
│ └─ ✓ Complete
VerboseLogger Methods:
- file_processing(filename, total_records): Context manager for file-level operations
- step(step_name): Context manager for processing steps
- detail(message): Log detailed information
- metric(label, value): Log metrics/statistics
- timing(operation, seconds): Log operation timing
- items_list(label, items, max_show): Log lists with truncation
Integration
-----------
Used by all pipeline modules:
- scripts/load_dictionary.py: Sheet and table processing
- scripts/extract_data.py: File extraction and duplicate column removal
- scripts/deidentify.py: De-identification and validation
See Also
--------
- User Guide: docs/sphinx/user_guide/usage.rst (Verbose Logging Details section)
- Developer Guide: docs/sphinx/developer_guide/architecture.rst (Logging System section)
"""
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Optional, Any
SUCCESS = 25
logging.addLevelName(SUCCESS, "SUCCESS")
_logger: Optional[logging.Logger] = None
_log_file_path: Optional[str] = None
[docs]
def setup_logger(name: str = "reportalin", log_level: int = logging.INFO, simple_mode: bool = False) -> logging.Logger:
"""Set up central logger with file and console handlers.
Args:
name: Logger name
log_level: Logging level (DEBUG, INFO, WARNING, etc.)
simple_mode: If True, minimal console output (success/errors only)
Note:
This function is idempotent - if called multiple times, it returns
the same logger instance. Parameters from subsequent calls are ignored.
To reconfigure, manually reset the global _logger variable.
"""
global _logger, _log_file_path
if _logger is not None:
# Log a debug message if parameters differ from initial setup
if _logger.level != log_level:
_logger.debug(f"setup_logger called with different log_level ({log_level}), but logger already initialized with level {_logger.level}")
return _logger
_logger = logging.getLogger(name)
_logger.setLevel(log_level)
_logger.handlers.clear()
logs_dir = Path(__file__).parents[2] / ".logs"
logs_dir.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = logs_dir / f"{name}_{timestamp}.log"
_log_file_path = str(log_file)
# Use detailed format for verbose (DEBUG) logging
if log_level == logging.DEBUG:
file_formatter = CustomFormatter(
'%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
)
else:
file_formatter = CustomFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(log_level)
file_handler.setFormatter(file_formatter)
# Console handler: behavior depends on simple_mode
console_handler = logging.StreamHandler(sys.stdout)
if simple_mode:
# Simple mode: only show SUCCESS, WARNING, ERROR, and CRITICAL
console_handler.setLevel(logging.WARNING)
console_handler.setFormatter(CustomFormatter('%(levelname)s: %(message)s'))
class SimpleFilter(logging.Filter):
"""Allow SUCCESS (25), WARNING (30), ERROR (40), and CRITICAL (50)."""
def filter(self, record: logging.LogRecord) -> bool:
return record.levelno == SUCCESS or record.levelno >= logging.WARNING
console_handler.addFilter(SimpleFilter())
else:
# Default mode: Show only SUCCESS, ERROR, and CRITICAL (suppress DEBUG, INFO, WARNING)
console_handler.setLevel(logging.ERROR)
console_handler.setFormatter(CustomFormatter('%(levelname)s: %(message)s'))
class SuccessOrErrorFilter(logging.Filter):
"""Allow SUCCESS (25), ERROR (40), and CRITICAL (50) but suppress WARNING (30)."""
def filter(self, record: logging.LogRecord) -> bool:
return record.levelno == SUCCESS or record.levelno >= logging.ERROR
console_handler.addFilter(SuccessOrErrorFilter())
_logger.addHandler(file_handler)
_logger.addHandler(console_handler)
_logger.info(f"Logging initialized. Log file: {log_file}")
return _logger
[docs]
def get_logger(name: Optional[str] = None) -> logging.Logger:
"""Get the configured logger instance or set it up if not already done.
Args:
name: Logger name (optional). Accepted for API compatibility with
standard Python logging patterns (e.g., ``logging.getLogger(__name__)``),
but currently ignored as this returns a singleton logger instance
shared by all modules.
Returns:
The global 'reportalin' logger instance
Note:
This function implements a singleton pattern - all modules share the same
logger instance. The name parameter is accepted for compatibility with
standard Python logging but is currently ignored. A debug message is
logged if a name is provided and the logger is already initialized.
Examples:
Standard usage (singleton pattern)::
from scripts.utils import get_logger
logger = get_logger()
logger.info("Processing data...")
Compatible with Python logging pattern::
logger = get_logger(__name__) # Works but returns singleton logger
logger.info("Message logged")
See Also:
:func:`setup_logger` - Configure the singleton logger with custom settings
"""
# Log a debug message if name is provided (helps users understand singleton behavior)
if name is not None and _logger is not None:
_logger.debug(
f"get_logger(name='{name}') called but returning singleton logger instance. "
f"All modules share the same 'reportalin' logger."
)
return _logger if _logger else setup_logger()
[docs]
def get_log_file_path() -> Optional[str]:
"""Get the path to the current log file."""
return _log_file_path
def _append_log_path(msg: str, include_log_path: bool) -> str:
"""Helper function to append log file path to error/warning messages."""
if include_log_path and get_log_file_path():
return f"{msg}\nFor more details, check the log file at: {get_log_file_path()}"
return msg
[docs]
def debug(msg: str, *args: Any, **kwargs: Any) -> None:
"""Log a DEBUG level message."""
get_logger().debug(msg, *args, **kwargs)
[docs]
def info(msg: str, *args: Any, **kwargs: Any) -> None:
"""Log an INFO level message."""
get_logger().info(msg, *args, **kwargs)
[docs]
def warning(msg: str, *args: Any, include_log_path: bool = False, **kwargs: Any) -> None:
"""Log a WARNING level message."""
get_logger().warning(_append_log_path(msg, include_log_path), *args, **kwargs)
[docs]
def error(msg: str, *args: Any, include_log_path: bool = True, **kwargs: Any) -> None:
"""Log an ERROR level message with optional log file path."""
get_logger().error(_append_log_path(msg, include_log_path), *args, **kwargs)
[docs]
def critical(msg: str, *args: Any, include_log_path: bool = True, **kwargs: Any) -> None:
"""Log a CRITICAL level message with optional log file path."""
get_logger().critical(_append_log_path(msg, include_log_path), *args, **kwargs)
[docs]
def success(msg: str, *args: Any, **kwargs: Any) -> None:
"""Log a SUCCESS level message (custom level 25)."""
get_logger().log(SUCCESS, msg, *args, **kwargs)
# Add success method to Logger class properly
def _success_method(self: logging.Logger, msg: str, *args: Any, **kwargs: Any) -> None:
"""Custom success logging method for Logger instances."""
if self.isEnabledFor(SUCCESS):
self.log(SUCCESS, msg, *args, **kwargs)
logging.Logger.success = _success_method # type: ignore[attr-defined]
# Verbose Logging Utilities (for detailed debugging across all steps)
# ====================================================================
[docs]
class VerboseLogger:
"""Centralized verbose logging for detailed output in DEBUG mode.
Provides formatted tree-view output for file processing, step execution,
and operation timing. Only logs when logger is in DEBUG mode.
Usage:
vlog = VerboseLogger(log)
with vlog.file_processing("file.xlsx", total_records=412):
with vlog.step("Processing step"):
vlog.detail("Details here")
"""
[docs]
def __init__(self, logger_module):
"""Initialize with logger module."""
self.log = logger_module
self._indent = 0
def _is_verbose(self) -> bool:
"""Check if verbose (DEBUG) logging is enabled."""
return get_logger().level == logging.DEBUG
def _log_tree(self, prefix: str, message: str) -> None:
"""Log with tree-view formatting."""
if self._is_verbose():
indent = " " * self._indent
self.log.debug(f"{indent}{prefix}{message}")
class _ContextManager:
"""Context manager for tree-view logging blocks."""
def __init__(self, vlog: 'VerboseLogger', prefix: str, header: str, footer: str = None):
self.vlog = vlog
self.prefix = prefix
self.header = header
self.footer = footer
def __enter__(self):
self.vlog._log_tree(self.prefix, self.header)
self.vlog._indent += 1
return self
def __exit__(self, *args):
self.vlog._indent -= 1
if self.footer:
# Use └─ for footer instead of ├─ to show the final item
self.vlog._log_tree("└─ ", self.footer)
[docs]
def file_processing(self, filename: str, total_records: int = None):
"""Context manager for processing a file."""
header = f"Processing: {filename}"
if total_records is not None:
header += f" ({total_records} records)"
return self._ContextManager(self, "├─ ", header, "✓ Complete")
[docs]
def step(self, step_name: str):
"""Context manager for a processing step."""
return self._ContextManager(self, "├─ ", step_name)
[docs]
def detail(self, message: str) -> None:
"""Log a detail message within a step."""
self._log_tree("│ ", message)
[docs]
def metric(self, label: str, value: Any) -> None:
"""Log a metric/statistic."""
self._log_tree("├─ ", f"{label}: {value}")
[docs]
def timing(self, operation: str, seconds: float) -> None:
"""Log operation timing."""
self._log_tree("├─ ", f"⏱ {operation}: {seconds:.2f}s")
[docs]
def items_list(self, label: str, items: list, max_show: int = 5) -> None:
"""Log a list of items with truncation if too long."""
if not self._is_verbose():
return
if len(items) <= max_show:
self.detail(f"{label}: {', '.join(str(i) for i in items)}")
else:
self.detail(f"{label}: {', '.join(str(i) for i in items[:max_show])} ... (+{len(items)-max_show} more)")
# Create a global VerboseLogger instance for use across modules
_verbose_logger: Optional[VerboseLogger] = None
[docs]
def get_verbose_logger() -> VerboseLogger:
"""Get or create the global VerboseLogger instance."""
global _verbose_logger
if _verbose_logger is None:
_verbose_logger = VerboseLogger(sys.modules[__name__])
return _verbose_logger