From 34caa5c5a3c1ece466dc4ddd0a07ed4f7805debc Mon Sep 17 00:00:00 2001 From: Xevion Date: Mon, 23 Jun 2025 17:43:21 -0500 Subject: [PATCH] feat(linux): implement systemd watchdog, add status updates to heartbeat, use SdNotify daemon constants - Log unhandled, unexpected signals --- build/unix/HATray.service | 1 + internal/service/linux.go | 41 +++++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/build/unix/HATray.service b/build/unix/HATray.service index 214631e..7c30ff4 100644 --- a/build/unix/HATray.service +++ b/build/unix/HATray.service @@ -9,6 +9,7 @@ Type=notify NotifyAccess=main ExecStart=$BINARY_PATH ExecReload=/bin/kill -HUP $MAINPID +WatchdogSec=10 Restart=on-failure RestartSec=5 diff --git a/internal/service/linux.go b/internal/service/linux.go index e01e7a0..761b754 100644 --- a/internal/service/linux.go +++ b/internal/service/linux.go @@ -3,6 +3,7 @@ package service import ( + "fmt" "log/slog" "os" "os/signal" @@ -34,18 +35,30 @@ func NewService(logger *slog.Logger) Service { // Run implements the Service interface for Linux func (s *linuxService) Run() error { - s.logger.Info("starting service") + startTime := time.Now() + s.logger.Info("starting service", "start_time", startTime.Format(time.RFC3339)) // Notify systemd that we are starting - daemon.SdNotify(false, "STATUS=Starting HATray...\n") + daemon.SdNotify(false, "STATUS=starting\n") // Setup signal handling for systemd sigs := make(chan os.Signal, 1) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) + // Setup watchdog to systemd + var watchdog *time.Ticker + if watchdogUSec, err := daemon.SdWatchdogEnabled(false); err == nil && watchdogUSec > 0 { + watchdog = time.NewTicker(watchdogUSec / 2) + } + defer func() { + if watchdog != nil { + watchdog.Stop() + } + }() + // Setup heartbeat to systemd - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() + heartbeat := time.NewTicker(2 * time.Second) + defer heartbeat.Stop() // Start the service (backgrounded so that the service can still respond to systemd signals, the app layer is still designed for concurrency) go func() { @@ -56,21 +69,23 @@ func (s *linuxService) Run() error { } // Notify systemd that we are ready (and running) - daemon.SdNotify(false, "READY=1") - daemon.SdNotify(false, "STATUS=HATray running\n") + daemon.SdNotify(false, daemon.SdNotifyReady) + daemon.SdNotify(false, fmt.Sprintf("STATUS=running for %s\n", time.Since(startTime).String())) }() for { select { - case <-ticker.C: - daemon.SdNotify(false, "WATCHDOG=1") - s.logger.Debug("heartbeat") // TODO: add more detailed status information here + // This is only called if the service is configured with watchdog + case <-watchdog.C: + daemon.SdNotify(false, daemon.SdNotifyWatchdog) + case <-heartbeat.C: + daemon.SdNotify(false, fmt.Sprintf("STATUS=running for %s\n", time.Since(startTime).String())) case sig := <-sigs: s.logger.Info("signal received", "signal", sig) switch sig { case syscall.SIGINT, syscall.SIGTERM: - daemon.SdNotify(false, "STOPPING=1") + daemon.SdNotify(false, daemon.SdNotifyStopping) s.logger.Info("stopping service") if err := s.app.Pause(); err != nil { @@ -80,13 +95,15 @@ func (s *linuxService) Run() error { return nil // exit the service case syscall.SIGHUP: s.logger.Info("reloading service") - daemon.SdNotify(false, "RELOADING=1") + daemon.SdNotify(false, daemon.SdNotifyReloading) if err := s.app.Reload(); err != nil { s.logger.Error("failed to reload app layer", "error", err) } - daemon.SdNotify(false, "READY=1") + daemon.SdNotify(false, daemon.SdNotifyReady) + default: + s.logger.Warn("unhandled signal", "signal", sig) } } }