深入浅出 Python Logging:从基础到进阶日志管理

发布于:2025-02-15 ⋅ 阅读:(22) ⋅ 点赞:(0)

在这里插入图片描述
在 Python 开发过程中,日志(Logging)是不可或缺的调试和监控工具。合理的日志管理不仅能帮助开发者快速定位问题,还能提供丰富的数据支持,让应用更具可观测性。本文将带你全面了解 Python logging 模块,涵盖日志输出到控制台、文件、Elasticsearch(ES),以及日志格式的介绍。

一、日志输出到控制台

最简单的日志管理方式就是将日志信息输出到控制台(标准输出)。这种方式在调试时非常方便,能够实时查看程序运行状态。默认情况下,Python logging 模块会以 WARNING 级别及以上的日志输出到控制台。

  def add_stream_handle(self):
        if LOG_TO_CONSOLE and LOG_ENABLED:
            handler = logging.StreamHandler(sys.stdout)
            handler.setLevel(level=CONSOLE_LEVEL)
            handler.setFormatter(logging.Formatter(LOG_FORMAT))
            self.logger.addHandler(handler)

二、日志输出到文件

在实际开发中,日志不仅仅是为了调试,更重要的是长期存储以便后续分析。因此,我们通常需要将日志写入文件,以便在程序运行后仍能追踪问题。通过 FileHandler,我们可以轻松地将日志写入指定的文件,并支持日志轮转(如按日期或文件大小分割)。

    def add_file_handle(self):
        if LOG_ENABLED and LOG_TO_FILE:
            logs_dir = os.path.join(LOG_DIR, "logs")
            os.makedirs(logs_dir, exist_ok=True)
            file = os.path.join(logs_dir, f'{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
            handler = logging.FileHandler(file, encoding="utf-8")
            handler.setLevel(level=FILE_LEVEL)
            handler.setFormatter(logging.Formatter(LOG_FORMAT))
            self.logger.addHandler(handler)

三、日志输出到 Elasticsearch(ES)

对于需要集中式日志管理的场景,可以将日志直接存储到 Elasticsearch 中,配合 Kibana 进行可视化分析。通过 logging 模块的自定义 Handler 或者 elastic-apm 等第三方库,我们可以将 Python 产生的日志数据直接推送到 ES,并结合全文检索和仪表盘进行日志分析。

class JsonSerializer:
    """JSON serializer relying on the standard library json module."""

    mimetype: ClassVar[str] = "application/json"

    def default(self, data: Any) -> Any:
        if isinstance(data, date):
            return data.isoformat()
        elif isinstance(data, uuid.UUID):
            return str(data)
        elif isinstance(data, Decimal):
            return float(data)
        raise Exception(f"Unable to serialize to JSON: {data!r} (type: {type(data).__name__})", )

    def json_dumps(self, data: dict) -> str:
        return json.dumps(
            data, default=self.default, ensure_ascii=False, separators=(",", ":")
        )

    def json_loads(self, data: str) -> dict:
        return json.loads(data)

    def loads(self, data: str) -> dict:
        return self.json_loads(data)

    def dumps(self, data: dict) -> str:
        # The body is already encoded to bytes
        # so we forward the request body along.
        if isinstance(data, str):
            return data
        return self.json_dumps(data)


def _attempt_serialize_numpy(data: Any) -> Tuple[bool, Any]:
    try:
        import numpy as np

        if isinstance(
                data,
                (
                        np.int_,
                        np.intc,
                        np.int8,
                        np.int16,
                        np.int32,
                        np.int64,
                        np.uint8,
                        np.uint16,
                        np.uint32,
                        np.uint64,
                ),
        ):
            return True, int(data)
        elif isinstance(
                data,
                (
                        np.float16,
                        np.float32,
                        np.float64,
                ),
        ):
            return True, float(data)
        elif isinstance(data, np.bool_):
            return True, bool(data)
        elif isinstance(data, np.datetime64):
            return True, data.item().isoformat()
        elif isinstance(data, np.ndarray):
            return True, data.tolist()

    except ImportError:
        # Since we failed to import 'numpy' we don't want to try again.
        return False, None

    return False, None


def _attempt_serialize_pandas(data: Any) -> Tuple[bool, Any]:
    try:
        import pandas as pd

        if isinstance(data, (pd.Series, pd.Categorical)):
            return True, data.tolist()
        elif isinstance(data, pd.Timestamp) and data is not getattr(pd, "NaT", None):
            return True, data.isoformat()
        elif data is getattr(pd, "NA", None):
            return True, None

    except ImportError:
        # Since we failed to import 'pandas' we don't want to try again.
        return False, None

    return False, None


def _attempt_serialize_numpy_or_pandas(data: Any) -> Tuple[bool, Any]:
    serialized, value = _attempt_serialize_numpy(data)
    if serialized:
        return serialized, value

    serialized, value = _attempt_serialize_pandas(data)
    if serialized:
        return serialized, value

    return False, None


TIME_TYPES = (date, datetime)
FLOAT_TYPES = (Decimal,)
INTEGER_TYPES = ()


class EsJsonSerializer(JsonSerializer):
    mimetype: ClassVar[str] = "application/json"

    def default(self, data: Any) -> Any:
        if isinstance(data, TIME_TYPES):
            # Little hack to avoid importing pandas but to not
            # return 'NaT' string for pd.NaT as that's not a valid
            # Elasticsearch date.
            formatted_data = data.isoformat()
            if formatted_data != "NaT":
                return formatted_data

        if isinstance(data, uuid.UUID):
            return str(data)
        elif isinstance(data, FLOAT_TYPES):
            return float(data)

        # This is kept for backwards compatibility even
        # if 'INTEGER_TYPES' isn't used by default anymore.
        elif INTEGER_TYPES and isinstance(data, INTEGER_TYPES):
            return int(data)

        # Special cases for numpy and pandas types
        # These are expensive to import so we try them last.
        serialized, value = _attempt_serialize_numpy_or_pandas(data)
        if serialized:
            return value

        raise TypeError(f"Unable to serialize {data!r} (type: {type(data)})")


class EsHandler(logging.Handler):

    def __init__(self, url, index, batch_size=100, flush_interval=6, extra_fields=None):
        super().__init__()
        self.default_raise = False
        self.index = index
        self.batch_size = batch_size
        self.flush_interval = flush_interval
        self.extra_fields = extra_fields if isinstance(extra_fields, dict) else {}
        self._buffer = []
        self.url = url
        self._timer = None
        self.serializer = EsJsonSerializer()

    @staticmethod
    def __get_es_datetime_str(timestamp):
        current_date = datetime.utcfromtimestamp(timestamp)
        return "{0!s}.{1:03d}Z".format(current_date.strftime('%Y-%m-%dT%H:%M:%S'), int(current_date.microsecond / 1000))

    def emit(self, record: logging.LogRecord) -> None:
        self.format(record)
        rec = self.extra_fields.copy()
        for key, value in record.__dict__.items():
            rec[key] = "" if value is None else value
        rec["timestamp"] = self.__get_es_datetime_str(record.created)

        self._buffer.append(rec)
        # 如果缓存已满,直接刷新
        if len(self._buffer) >= self.batch_size:
            self.flush()
        else:
            self.schedule_flush()

    def schedule_flush(self):
        if self._timer is None:
            self._timer = Timer(self.flush_interval, self.flush)
            self._timer.daemon = True
            self._timer.start()

    def flush(self):
        # 如果缓存中有日志,则批量写入
        if self._timer is not None and self._timer.is_alive():
            self._timer.cancel()
        self._timer = None
        if self._buffer:
            try:
                logs_buffer = self._buffer
                self._buffer = []
                self.bulk_data(logs_buffer)
            except Exception as exception:
                if self.default_raise:
                    raise exception

    def bulk_data(self, logs_buffer):
        print("批量写入 Elasticsearch")
        request_body = ""
        for log_record in logs_buffer:
            actions = {"index": {'_index': self.index, "_id": str(uuid4())}}
            data = json.dumps(actions)
            request_body += data
            request_body += "\n"
            request_body += EsJsonSerializer().json_dumps(log_record)
            request_body += "\n"
        request_body += "\n"
        headers = {'content-type': 'application/json'}
        requests.post(self.url, data=request_body, headers=headers)

    def close(self):
        if self._timer is not None:
            self.flush()
        self._timer = None

四、日志格式介绍

字段 作用
%(levelname)s 日志级别(如 DEBUG、INFO、WARNING、ERROR、CRITICAL),用于标识日志的严重程度。
%(asctime)s 日志的时间戳,默认格式为 YYYY-MM-DD HH:MM:SS,mmm(可自定义 datefmt)。
%(process)d 进程 ID,表示当前日志属于哪个进程(适用于多进程应用)。
%(filename)s 记录日志的 Python 文件名(不包含路径)。
%(name)s 记录日志的 Logger 名称,通常是 logging.getLogger(name) 设定的名称。
%(lineno)d 代码中的行号,表示日志记录发生的确切位置。
%(module)s 模块名称,即 filename 去掉 .py 的部分。
%(message)s 实际的日志信息,即 logger.info(“内容”) 传入的内容。
%(levelno)s 日志级别的数值(如 DEBUG=10, INFO=20, WARNING=30, ERROR=40, CRITICAL=50)
%(pathname)s 记录日志的 Python 文件完整路径。
%(funcName)s 记录日志所在的函数名。
%(thread)d 线程 ID(适用于多线程应用)。
%(threadName)s 线程名称。
%(processName)s 记录日志时距 logging 模块加载后的毫秒数。
%(msecs)d 记录日志的时间戳的毫秒部分。
# 示例
LOG_FORMAT = '%(levelname)s - %(asctime)s - [PID: %(process)d] [Thread: %(threadName)s] - %(pathname)s - %(funcName)s - Line: %(lineno)d - %(message)s'

五、完整的代码

import logging
import sys
import os
import json
import socket
import time
import uuid
from datetime import date, datetime
from decimal import Decimal
from typing import Any, ClassVar, Mapping, Optional, Tuple

from threading import Timer
from multiprocessing import Lock
import requests
from uuid import uuid4

LOG_ENABLED = True  # 是否开启日志
LOG_TO_CONSOLE = True  # 是否输出到控制台
CONSOLE_LEVEL = "INFO"  # 控制台的日志等级
LOG_TO_FILE = True  # 是否输出到文件
FILE_LEVEL = "INFO"  # 文件的日志等级
LOG_TO_ES = True  # 是否输出到 Elasticsearch
ES_LEVEL = "INFO"  # 输出到ES的日志等级

LOG_LEVEL = 'DEBUG'  # 日志级别

LOG_FORMAT = '%(levelname)s - %(asctime)s - process: %(process)d - %(filename)s - %(name)s - %(lineno)d - %(module)s - %(message)s'  # 每条日志输出格式

ELASTIC_SEARCH_HOST = 'http://192.168.3.200:9200'  # Elasticsearch Host
ELASTIC_SEARCH_INDEX = 'python'  # Elasticsearch Index Name
APP_NAME = "demo-fastapi"
APP_ENVIRONMENT = 'dev'  # 运行环境,如测试环境还是生产环境
LOG_DIR = os.getcwd()


class JsonSerializer:
    """JSON serializer relying on the standard library json module."""

    mimetype: ClassVar[str] = "application/json"

    def default(self, data: Any) -> Any:
        if isinstance(data, date):
            return data.isoformat()
        elif isinstance(data, uuid.UUID):
            return str(data)
        elif isinstance(data, Decimal):
            return float(data)
        raise Exception(f"Unable to serialize to JSON: {data!r} (type: {type(data).__name__})", )

    def json_dumps(self, data: dict) -> str:
        return json.dumps(
            data, default=self.default, ensure_ascii=False, separators=(",", ":")
        )

    def json_loads(self, data: str) -> dict:
        return json.loads(data)

    def loads(self, data: str) -> dict:
        return self.json_loads(data)

    def dumps(self, data: dict) -> str:
        # The body is already encoded to bytes
        # so we forward the request body along.
        if isinstance(data, str):
            return data
        return self.json_dumps(data)


def _attempt_serialize_numpy(data: Any) -> Tuple[bool, Any]:
    try:
        import numpy as np

        if isinstance(
                data,
                (
                        np.int_,
                        np.intc,
                        np.int8,
                        np.int16,
                        np.int32,
                        np.int64,
                        np.uint8,
                        np.uint16,
                        np.uint32,
                        np.uint64,
                ),
        ):
            return True, int(data)
        elif isinstance(
                data,
                (
                        np.float16,
                        np.float32,
                        np.float64,
                ),
        ):
            return True, float(data)
        elif isinstance(data, np.bool_):
            return True, bool(data)
        elif isinstance(data, np.datetime64):
            return True, data.item().isoformat()
        elif isinstance(data, np.ndarray):
            return True, data.tolist()

    except ImportError:
        # Since we failed to import 'numpy' we don't want to try again.
        return False, None

    return False, None


def _attempt_serialize_pandas(data: Any) -> Tuple[bool, Any]:
    try:
        import pandas as pd

        if isinstance(data, (pd.Series, pd.Categorical)):
            return True, data.tolist()
        elif isinstance(data, pd.Timestamp) and data is not getattr(pd, "NaT", None):
            return True, data.isoformat()
        elif data is getattr(pd, "NA", None):
            return True, None

    except ImportError:
        # Since we failed to import 'pandas' we don't want to try again.
        return False, None

    return False, None


def _attempt_serialize_numpy_or_pandas(data: Any) -> Tuple[bool, Any]:
    serialized, value = _attempt_serialize_numpy(data)
    if serialized:
        return serialized, value

    serialized, value = _attempt_serialize_pandas(data)
    if serialized:
        return serialized, value

    return False, None


TIME_TYPES = (date, datetime)
FLOAT_TYPES = (Decimal,)
INTEGER_TYPES = ()


class EsJsonSerializer(JsonSerializer):
    mimetype: ClassVar[str] = "application/json"

    def default(self, data: Any) -> Any:
        if isinstance(data, TIME_TYPES):
            # Little hack to avoid importing pandas but to not
            # return 'NaT' string for pd.NaT as that's not a valid
            # Elasticsearch date.
            formatted_data = data.isoformat()
            if formatted_data != "NaT":
                return formatted_data

        if isinstance(data, uuid.UUID):
            return str(data)
        elif isinstance(data, FLOAT_TYPES):
            return float(data)

        # This is kept for backwards compatibility even
        # if 'INTEGER_TYPES' isn't used by default anymore.
        elif INTEGER_TYPES and isinstance(data, INTEGER_TYPES):
            return int(data)

        # Special cases for numpy and pandas types
        # These are expensive to import so we try them last.
        serialized, value = _attempt_serialize_numpy_or_pandas(data)
        if serialized:
            return value

        raise TypeError(f"Unable to serialize {data!r} (type: {type(data)})")


class EsHandler(logging.Handler):

    def __init__(self, url, index, batch_size=100, flush_interval=6, extra_fields=None):
        super().__init__()
        self.default_raise = False
        self.index = index
        self.batch_size = batch_size
        self.flush_interval = flush_interval
        self.extra_fields = extra_fields if isinstance(extra_fields, dict) else {}
        self._buffer = []
        self.url = url
        self._timer = None
        self.serializer = EsJsonSerializer()

    @staticmethod
    def __get_es_datetime_str(timestamp):
        current_date = datetime.utcfromtimestamp(timestamp)
        return "{0!s}.{1:03d}Z".format(current_date.strftime('%Y-%m-%dT%H:%M:%S'), int(current_date.microsecond / 1000))

    def emit(self, record: logging.LogRecord) -> None:
        self.format(record)
        rec = self.extra_fields.copy()
        for key, value in record.__dict__.items():
            rec[key] = "" if value is None else value
        rec["timestamp"] = self.__get_es_datetime_str(record.created)

        self._buffer.append(rec)
        # 如果缓存已满,直接刷新
        if len(self._buffer) >= self.batch_size:
            self.flush()
        else:
            self.schedule_flush()

    def schedule_flush(self):
        if self._timer is None:
            self._timer = Timer(self.flush_interval, self.flush)
            self._timer.daemon = True
            self._timer.start()

    def flush(self):
        # 如果缓存中有日志,则批量写入
        if self._timer is not None and self._timer.is_alive():
            self._timer.cancel()
        self._timer = None
        if self._buffer:
            try:
                logs_buffer = self._buffer
                self._buffer = []
                self.bulk_data(logs_buffer)
            except Exception as exception:
                if self.default_raise:
                    raise exception

    def bulk_data(self, logs_buffer):
        print("批量写入 Elasticsearch")
        request_body = ""
        for log_record in logs_buffer:
            actions = {"index": {'_index': self.index, "_id": str(uuid4())}}
            data = json.dumps(actions)
            request_body += data
            request_body += "\n"
            request_body += EsJsonSerializer().json_dumps(log_record)
            request_body += "\n"
        request_body += "\n"
        headers = {'content-type': 'application/json'}
        requests.post(self.url, data=request_body, headers=headers)

    def close(self):
        if self._timer is not None:
            self.flush()
        self._timer = None


class Logger:
    _instance = None
    _lock = Lock()

    def __init__(self, ):
        self.logger = logging.getLogger("__file__")
        self.logger.setLevel(LOG_LEVEL)
        self.add_stream_handle()
        self.add_file_handle()
        self.add_es_handle()

    def add_stream_handle(self):
        if LOG_TO_CONSOLE and LOG_ENABLED:
            handler = logging.StreamHandler(sys.stdout)
            handler.setLevel(level=CONSOLE_LEVEL)
            handler.setFormatter(logging.Formatter(LOG_FORMAT))
            self.logger.addHandler(handler)

    def add_file_handle(self):
        if LOG_ENABLED and LOG_TO_FILE:
            logs_dir = os.path.join(LOG_DIR, "logs")
            os.makedirs(logs_dir, exist_ok=True)
            file = os.path.join(logs_dir, f'{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
            handler = logging.FileHandler(file, encoding="utf-8")
            handler.setLevel(level=FILE_LEVEL)
            handler.setFormatter(logging.Formatter(LOG_FORMAT))
            self.logger.addHandler(handler)

    def add_es_handle(self):
        if LOG_ENABLED and LOG_TO_ES:
            # 添加 CMRESHandler
            url = f"{ELASTIC_SEARCH_HOST}/_bulk"
            extra_fields = {"app_name": APP_NAME, "app_env": APP_ENVIRONMENT, 'host': socket.gethostname(),
                            'host_ip': socket.gethostbyname(socket.gethostname())}
            es_handler = EsHandler(url, index=ELASTIC_SEARCH_INDEX, extra_fields=extra_fields)
            es_handler.setLevel(level=LOG_LEVEL)
            formatter = logging.Formatter(LOG_FORMAT)
            es_handler.setFormatter(formatter)
            self.logger.addHandler(es_handler)

    @classmethod
    def get_logger(cls):
        cls._lock.acquire()
        if cls._instance is None:
            cls._instance = Logger().logger
        cls._lock.release()
        return cls._instance


if __name__ == '__main__':
    logger = Logger.get_logger()
    logger.info("INFO")
    logger.warning("WARNING")
    logger.error("ERROR")
    time.sleep(100)

结语

掌握 Python 的 logging 模块,能让你的日志管理更加高效,无论是简单的控制台调试,还是日志文件持久化存储,甚至是对接 ELK 进行集中管理,都是开发者必备的技能。