Add ASGI Request-Id correlation, add structlog LoggingMiddleware, overhaul all logging

- minor formatting details, type fixes.
2025-12-14 02:12:07 -06:00 · 2024-11-01 04:17:28 -05:00
parent a96631e81e
commit 3a2ef75086
4 changed files with 193 additions and 72 deletions
--- a/backend/linkpulse/main.py
+++ b/backend/linkpulse/main.py
@@ -1,18 +1,24 @@
 import sys
 import structlog
-import linkpulse.logging
+
 logger = structlog.get_logger()
 def main(*args):
    if args[0] == "serve":
-        import asyncio
+        from linkpulse.logging import setup_logging
        from linkpulse.app import app
        from uvicorn import run
        setup_logging()
-        logger.debug('Invoking uvicorn.run')
+        logger.debug("Invoking uvicorn.run")
-        run('linkpulse.app:app', reload=True, host='0.0.0.0', access_log=True)
+        run(
            "linkpulse.app:app",
            reload=True,
            host="0.0.0.0"
        )
    elif args[0] == "migrate":
        from linkpulse.migrate import main
@@ -27,7 +33,8 @@ def main(*args):
        from linkpulse.models import BaseModel, IPAddress
        # start REPL
-        from bpython import embed
+        from bpython import embed  # type: ignore
        embed(locals())
    else:
        print("Invalid command: {}".format(args[0]))
--- a/backend/linkpulse/app.py
+++ b/backend/linkpulse/app.py
@@ -1,13 +1,14 @@
 import logging
 import os
 import random
 from collections import defaultdict
 from contextlib import asynccontextmanager
 from dataclasses import dataclass, field
 from datetime import datetime
 from typing import AsyncIterator
-import structlog
+from asgi_correlation_id import CorrelationIdMiddleware
 import human_readable
 from apscheduler.schedulers.background import BackgroundScheduler  # type: ignore
 from apscheduler.triggers.interval import IntervalTrigger  # type: ignore
@@ -17,11 +18,13 @@ from fastapi_cache import FastAPICache
 from fastapi_cache.backends.inmemory import InMemoryBackend
 from fastapi_cache.decorator import cache
 from linkpulse.utilities import get_ip, hide_ip, pluralize
 from linkpulse.middleware import LoggingMiddleware
 from peewee import PostgresqlDatabase
 from psycopg2.extras import execute_values
-if not structlog.is_configured():
+from linkpulse.logging import setup_logging
-    import linkpulse.logging
+
 setup_logging(json_logs=False, log_level="DEBUG")
 load_dotenv(dotenv_path=".env")
@@ -30,8 +33,7 @@ from linkpulse import models, responses  # type: ignore
 # global variables
 is_development = os.getenv("ENVIRONMENT") == "development"
 db: PostgresqlDatabase = models.BaseModel._meta.database  # type: ignore
-
+logger = logging.getLogger()
 logger = structlog.get_logger(__name__)
 def flush_ips():
@@ -86,15 +88,15 @@ async def lifespan(_: FastAPI) -> AsyncIterator[None]:
    # Delete all randomly generated IP addresses
    with db.atomic():
-        logging.info(
+        logger.info(
            "Deleting Randomized IP Addresses",
            {"ip_pool_count": len(app.state.ip_pool)},
        )
        query = models.IPAddress.delete().where(
            models.IPAddress.ip << app.state.ip_pool
        )
-        rowcount = query.execute()
+        row_count = query.execute()
-        logger.info("Randomized IP Addresses deleted", {"rowcount": rowcount})
+        logger.info("Randomized IP Addresses deleted", {"row_count": row_count})
    FastAPICache.init(
        backend=InMemoryBackend(), prefix="fastapi-cache", cache_status_header="X-Cache"
@@ -126,19 +128,20 @@ app = FastAPI(lifespan=lifespan)
 if is_development:
    from fastapi.middleware.cors import CORSMiddleware
    origins = [
        "http://localhost",
        "http://localhost:5173",
    ]
    app.add_middleware(
        CORSMiddleware,
-        allow_origins=origins,
+        allow_origins=[
            "http://localhost",
            "http://localhost:5173",
        ],
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
 app.add_middleware(LoggingMiddleware)
 app.add_middleware(CorrelationIdMiddleware)
@app.get("/health")
 async def health():
--- a/backend/linkpulse/logging.py
+++ b/backend/linkpulse/logging.py
@@ -3,59 +3,119 @@ import sys
 from typing import List
 import structlog
-from structlog.stdlib import ProcessorFormatter
+from structlog.types import EventDict, Processor
-from structlog.types import Processor
+from itertools import chain
 shared_processors: List[Processor] = [
    structlog.stdlib.add_log_level,
    structlog.processors.CallsiteParameterAdder(
        {
            structlog.processors.CallsiteParameter.MODULE,
            structlog.processors.CallsiteParameter.FILENAME,
            structlog.processors.CallsiteParameter.LINENO,
        }
    ),
    structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S.%f"),
 ]
 structlog_processors = shared_processors + []
 # Remove _record & _from_structlog.
 logging_processors: List[Processor] = [ProcessorFormatter.remove_processors_meta]
 if sys.stderr.isatty():
    console_renderer = structlog.dev.ConsoleRenderer()
    logging_processors.append(console_renderer)
    structlog_processors.append(console_renderer)
 else:
    json_renderer = structlog.processors.JSONRenderer(indent=1, sort_keys=True)
    structlog_processors.append(json_renderer)
    logging_processors.append(json_renderer)
 structlog.configure(
    processors=structlog_processors,
    wrapper_class=structlog.stdlib.BoundLogger,
    # logger_factory=structlog.stdlib.LoggerFactory(),
    logger_factory=structlog.PrintLoggerFactory(sys.stderr),
    context_class=dict,
    cache_logger_on_first_use=True,
 )
-formatter = ProcessorFormatter(
+def rename_event_key(_, __, event_dict: EventDict) -> EventDict:
-    # These run ONLY on `logging` entries that do NOT originate within
+    """
-    # structlog.
+    Renames the `event` key to `msg`, as Railway expects it in that form.
-    foreign_pre_chain=shared_processors,
+    """
-    # These run on ALL entries after the pre_chain is done.
+    print(event_dict)
-    processors=logging_processors,
+    event_dict["msg"] = event_dict.pop("event")
-)
+    return event_dict
 handler = logging.StreamHandler(sys.stderr)
 # Use OUR `ProcessorFormatter` to format all `logging` entries.
 handler.setFormatter(formatter)
 logging.basicConfig(handlers=[handler], level=logging.INFO)
-external_loggers = ["uvicorn.error", "uvicorn.access"]
+def drop_color_message_key(_, __, event_dict: EventDict) -> EventDict:
-for logger_name in external_loggers:
+    """
-    logger = logging.getLogger(logger_name)
+    Uvicorn logs the message a second time in the extra `color_message`, but we don't
-    logger.handlers = [handler]
+    need it. This processor drops the key from the event dict if it exists.
-    logger.propagate = False
+    """
    event_dict.pop("color_message", None)
    return event_dict
 def setup_logging(json_logs: bool = False, log_level: str = "INFO"):
    def flatten(n):
        match n:
            case []: return []
            case [[*hd], *tl]: return [*flatten(hd), *flatten(tl)]
            case [hd, *tl]: return [hd, *flatten(tl)]
    shared_processors: List[Processor] = flatten([
        structlog.contextvars.merge_contextvars,
        structlog.stdlib.add_logger_name,
        structlog.stdlib.add_log_level,
        structlog.stdlib.PositionalArgumentsFormatter(),
        structlog.stdlib.ExtraAdder(),
        drop_color_message_key,
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.StackInfoRenderer(),
        (
            [
                rename_event_key,
                # Format the exception only for JSON logs, as we want to pretty-print them when using the ConsoleRenderer
                structlog.processors.format_exc_info,
            ]
            if json_logs
            else []
        ),
    ])
    structlog.configure(
        processors=[
            *shared_processors,
            # Prepare event dict for `ProcessorFormatter`.
            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
        ],
        logger_factory=structlog.stdlib.LoggerFactory(),
        cache_logger_on_first_use=True,
    )
    log_renderer: structlog.types.Processor
    if json_logs:
        log_renderer = structlog.processors.JSONRenderer()
    else:
        log_renderer = structlog.dev.ConsoleRenderer()
    formatter = structlog.stdlib.ProcessorFormatter(
        # These run ONLY on `logging` entries that do NOT originate within structlog.
        foreign_pre_chain=shared_processors,
        # These run on ALL entries after the pre_chain is done.
        processors=[
            # Remove _record & _from_structlog.
            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
            log_renderer,
        ],
    )
    handler = logging.StreamHandler()
    # Use OUR `ProcessorFormatter` to format all `logging` entries.
    handler.setFormatter(formatter)
    root_logger = logging.getLogger()
    root_logger.addHandler(handler)
    root_logger.setLevel(log_level.upper())
    def configure_logger(name: str, clear: bool, propagate: bool) -> None:
        logger = logging.getLogger(name)
        if clear:
            logger.handlers.clear()
        logger.propagate = propagate
    for _log in ["uvicorn", "uvicorn.error"]:
        # Clear the log handlers for uvicorn loggers, and enable propagation
        # so the messages are caught by our root logger and formatted correctly
        # by structlog
        configure_logger(_log, clear=True, propagate=False)
    # Since we re-create the access logs ourselves, to add all information
    # in the structured log (see the `logging_middleware` in main.py), we clear
    # the handlers and prevent the logs to propagate to a logger higher up in the
    # hierarchy (effectively rendering them silent).
    configure_logger("uvicorn.access", clear=True, propagate=False)
    def handle_exception(exc_type, exc_value, exc_traceback):
        """
        Log any uncaught exception instead of letting it be printed by Python
        (but leave KeyboardInterrupt untouched to allow users to Ctrl+C to stop)
        See https://stackoverflow.com/a/16993115/3641865
        """
        if issubclass(exc_type, KeyboardInterrupt):
            sys.__excepthook__(exc_type, exc_value, exc_traceback)
            return
        root_logger.error(
            "Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback)
        )
    sys.excepthook = handle_exception
--- a/backend/linkpulse/middleware.py
+++ b/backend/linkpulse/middleware.py
@@ -0,0 +1,51 @@
 import time
 from asgi_correlation_id import correlation_id
 import structlog
 from fastapi import FastAPI, Request, Response
 from starlette.middleware.base import BaseHTTPMiddleware
 class LoggingMiddleware(BaseHTTPMiddleware):
    def __init__(self, app: FastAPI):
        super().__init__(app)
        self.access_logger = structlog.get_logger("api.access")
    async def dispatch(self, request: Request, call_next) -> Response:
        structlog.contextvars.clear_contextvars()
        # These context vars will be added to all log entries emitted during the request
        request_id = correlation_id.get()
        structlog.contextvars.bind_contextvars(request_id=request_id)
        start_time = time.perf_counter_ns()
        # If the call_next raises an error, we still want to return our own 500 response,
        # so we can add headers to it (process time, request ID...)
        response = Response(status_code=500)
        try:
            response = await call_next(request)
        except Exception:
            # TODO: Validate that we don't swallow exceptions (unit test?)
            structlog.stdlib.get_logger("api.error").exception("Uncaught exception")
            raise
        finally:
            process_time = time.perf_counter_ns() - start_time
            self.access_logger.info(
                "Request",
                http={
                    "url": str(request.url),
                    "query": dict(request.query_params),
                    "status_code": response.status_code,
                    "method": request.method,
                    "request_id": request_id,
                    "version": request.scope["http_version"],
                },
                client={"ip": request.client.host, "port": request.client.port} if request.client else None,
                duration=process_time,
            )
            # TODO: Is this being returned in production? We shouldn't be leaking this.
            response.headers["X-Process-Time"] = str(process_time / 10 ** 9)
            return response