diff --git a/modules/nixos/tools/telegraf/default.nix b/modules/nixos/tools/telegraf/default.nix index 2cc55d3..f1a2cf6 100644 --- a/modules/nixos/tools/telegraf/default.nix +++ b/modules/nixos/tools/telegraf/default.nix @@ -28,11 +28,11 @@ let ''; baseConfig = { agent = { - interval = "10s"; + interval = cfg.interval; round_interval = true; metric_batch_size = 1000; metric_buffer_limit = 10000; - flush_interval = "10s"; + flush_interval = cfg.interval; hostname = config.syscfg.hostname; omit_hostname = false; }; @@ -258,6 +258,14 @@ let } ]; }) + (lib.mkIf (hasCollector "systemd_units") { + inputs.systemd_units = { + pattern = "*"; + unittype = "service"; + details = true; + timeout = "5s"; + }; + }) (lib.mkIf (hasCollector "ping") { inputs.ping = { urls = [ "1.1.1.1" ]; @@ -278,12 +286,40 @@ let memory_saving_mode = true; }; }) - (lib.mkIf (hasCollector "gpu") { - inputs.exec = [{ - commands = [ amdgpuMetricsScript ]; - timeout = "5s"; - data_format = "influx"; - }]; + (lib.mkIf (hasCollector "gpu" || hasCollector "nix") { + inputs.exec = + lib.optionals (hasCollector "gpu") [{ + commands = [ amdgpuMetricsScript ]; + timeout = "5s"; + data_format = "influx"; + }] + ++ lib.optionals (hasCollector "nix") [{ + commands = [ + (pkgs.writeShellScript "telegraf-nix-metrics" '' + set -euo pipefail + + current="$(${lib.getExe pkgs.nixos-rebuild} list-generations | ${lib.getExe pkgs.gawk} '$NF == "True" {print $1 "|" $2 " " $3; exit}')" + [ -n "$current" ] + + generation="''${current%%|*}" + build_datetime="''${current#*|}" + build_timestamp="$(${lib.getExe' pkgs.coreutils "date"} -d "$build_datetime" +%s)" + now="$(${lib.getExe' pkgs.coreutils "date"} +%s)" + store_bytes="$(${lib.getExe' pkgs.coreutils "du"} -sb /nix/store | ${lib.getExe pkgs.gawk} '{print $1}')" + + printf 'nix generation=%si,configured_packages=%si,store_bytes=%si,build_timestamp=%si,seconds_since_build=%si,build_datetime="%s"\n' \ + "$generation" \ + ${toString (builtins.length config.environment.systemPackages)} \ + "$store_bytes" \ + "$build_timestamp" \ + "$((now - build_timestamp))" \ + "$build_datetime" + '') + ]; + interval = "1h"; + timeout = "30s"; + data_format = "influx"; + }]; }) ]; outputsConfig = lib.mkMerge [{ diff --git a/modules/shared/syscfg/monitoring.nix b/modules/shared/syscfg/monitoring.nix index 759e243..368b1e1 100644 --- a/modules/shared/syscfg/monitoring.nix +++ b/modules/shared/syscfg/monitoring.nix @@ -6,6 +6,11 @@ with lib; { default = false; }; + interval = mkOption { + type = types.str; + default = "10s"; + }; + collectors = mkOption { type = types.listOf (types.enum [ "cpu" @@ -22,6 +27,8 @@ with lib; { "mdstat" "smart" "docker" + "systemd_units" + "nix" "ping" "internet_speed" "gpu" diff --git a/systems/sandbox/cfg.nix b/systems/sandbox/cfg.nix index 85e80d9..2573539 100644 --- a/systems/sandbox/cfg.nix +++ b/systems/sandbox/cfg.nix @@ -90,6 +90,7 @@ }; monitoring.telegraf = { enable = true; + interval = "15s"; collectors = [ "cpu" "mem" @@ -102,6 +103,8 @@ #"netstat" #"processes" "docker" + "systemd_units" + "nix" "ping" "internet_speed" ];