260 lines
6.7 KiB
Nix
260 lines
6.7 KiB
Nix
{ config, lib, pkgs, ... }:
|
|
let
|
|
cfg = config.syscfg.monitoring.telegraf;
|
|
hasCollector = name: builtins.elem name cfg.collectors;
|
|
dockerGroups =
|
|
lib.optionals (cfg.enable && hasCollector "docker" && config.virtualisation.podman.enable) [ "podman" ]
|
|
++ lib.optionals (cfg.enable && hasCollector "docker" && config.virtualisation.docker.enable) [ "docker" ];
|
|
amdgpuMetricsScript = pkgs.writeShellScript "telegraf-amdgpu-metrics" ''
|
|
set -euo pipefail
|
|
${lib.getExe pkgs.custom.amdgpu_top} -J -n 1 | ${lib.getExe pkgs.jq} -r '
|
|
def maybe_int($name; $value):
|
|
if $value == null then empty else "\($name)=\(($value | floor))i" end;
|
|
def maybe_float($name; $value):
|
|
if $value == null then empty else "\($name)=\($value)" end;
|
|
.devices
|
|
| to_entries[]
|
|
| [
|
|
maybe_int("utilization_gpu"; (.value.gpu_activity.GFX.value // .value.GRBM2["Command Processor - Graphics"].value // 0)),
|
|
maybe_int("utilization_media"; .value.gpu_activity.MediaEngine.value),
|
|
maybe_int("utilization_memory"; .value.gpu_activity.Memory.value),
|
|
maybe_float("temperature_edge"; .value.Sensors["Edge Temperature"].value),
|
|
maybe_float("power_draw"; .value.gpu_metrics.average_socket_power.value)
|
|
] as $fields
|
|
| map(select(length > 0)) as $nonempty
|
|
| select(($nonempty | length) > 0)
|
|
| "amdgpu,card=\(.key) " + ($nonempty | join(","))
|
|
'
|
|
'';
|
|
baseConfig = {
|
|
agent = {
|
|
interval = "10s";
|
|
round_interval = true;
|
|
metric_batch_size = 1000;
|
|
metric_buffer_limit = 10000;
|
|
flush_interval = "10s";
|
|
hostname = config.syscfg.hostname;
|
|
omit_hostname = false;
|
|
};
|
|
global_tags = {
|
|
host = config.syscfg.hostname;
|
|
};
|
|
};
|
|
inputsConfig = lib.mkMerge [
|
|
(lib.mkIf (hasCollector "cpu") {
|
|
inputs.cpu = {
|
|
percpu = true;
|
|
totalcpu = true;
|
|
collect_cpu_time = false;
|
|
report_active = false;
|
|
fielddrop = [
|
|
"usage_guest"
|
|
"usage_guest_nice"
|
|
"usage_irq"
|
|
"usage_nice"
|
|
"usage_softirq"
|
|
"usage_steal"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "mem") {
|
|
inputs.mem = {
|
|
fielddrop = [
|
|
"available_percent"
|
|
"commit_limit"
|
|
"committed_as"
|
|
"high_free"
|
|
"high_total"
|
|
"huge_page_size"
|
|
"huge_pages_free"
|
|
"huge_pages_total"
|
|
"low_free"
|
|
"low_total"
|
|
"mapped"
|
|
"page_tables"
|
|
"slab"
|
|
"sreclaimable"
|
|
"sunreclaim"
|
|
"swap_cached"
|
|
"swap_free"
|
|
"swap_total"
|
|
"vmalloc_chunk"
|
|
"vmalloc_total"
|
|
"vmalloc_used"
|
|
"write_back"
|
|
"write_back_tmp"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "swap") {
|
|
inputs.swap = {
|
|
fielddrop = [
|
|
"free"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "system") {
|
|
inputs.system = {
|
|
fielddrop = [
|
|
"n_physical_cpus"
|
|
"n_unique_users"
|
|
"uptime_format"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "disk") {
|
|
inputs.disk = {
|
|
ignore_fs = [ "tmpfs" "devtmpfs" "devfs" "overlay" "squashfs" ];
|
|
fielddrop = [
|
|
"free"
|
|
"inodes_free"
|
|
"inodes_total"
|
|
"inodes_used"
|
|
"inodes_used_percent"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "diskio") {
|
|
inputs.diskio = {
|
|
skip_serial_number = true;
|
|
fielddrop = [
|
|
"io_svctm"
|
|
"iops_in_progress"
|
|
"merged_reads"
|
|
"merged_writes"
|
|
"weighted_io_time"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "kernel") {
|
|
inputs.kernel = {
|
|
fielddrop = [
|
|
"boot_time"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "net") {
|
|
inputs.net = {
|
|
fielddrop = [
|
|
"bytes_recv"
|
|
"bytes_sent"
|
|
"speed"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "netstat") {
|
|
inputs.netstat = {
|
|
fielddrop = [
|
|
"tcp_close"
|
|
"tcp_close_wait"
|
|
"tcp_closing"
|
|
"tcp_fin_wait1"
|
|
"tcp_fin_wait2"
|
|
"tcp_last_ack"
|
|
"tcp_none"
|
|
"tcp_syn_recv"
|
|
"tcp_syn_sent"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "processes") {
|
|
inputs.processes = {
|
|
fielddrop = [
|
|
"dead"
|
|
"idle"
|
|
"paging"
|
|
"stopped"
|
|
"unknown"
|
|
"zombies"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "temp") {
|
|
inputs.temp = { };
|
|
})
|
|
(lib.mkIf (hasCollector "mdstat") {
|
|
inputs.mdstat = { };
|
|
})
|
|
(lib.mkIf (hasCollector "smart") {
|
|
inputs.smart = {
|
|
use_sudo = true;
|
|
attributes = true;
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "docker") {
|
|
inputs.docker = {
|
|
endpoint = "unix:///var/run/docker.sock";
|
|
timeout = "5s";
|
|
perdevice_include = [ ];
|
|
total_include = [ ];
|
|
fielddrop = [
|
|
"memory_total"
|
|
"n_cpus"
|
|
"n_goroutines"
|
|
"n_listener_events"
|
|
"n_used_file_descriptors"
|
|
"server_version"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "ping") {
|
|
inputs.ping = {
|
|
urls = [ "1.1.1.1" ];
|
|
count = 4;
|
|
interval = "60s";
|
|
timeout = 5.0;
|
|
binary = "${pkgs.iputils}/bin/ping";
|
|
fielddrop = [
|
|
"packets_received"
|
|
"packets_transmitted"
|
|
];
|
|
};
|
|
})
|
|
(lib.mkIf (hasCollector "gpu") {
|
|
inputs.exec = [{
|
|
commands = [ amdgpuMetricsScript ];
|
|
timeout = "5s";
|
|
data_format = "influx";
|
|
}];
|
|
})
|
|
];
|
|
outputsConfig = lib.mkMerge [{
|
|
outputs.influxdb_v3 = {
|
|
urls = cfg.outputs;
|
|
token = "$INFLUX_TOKEN";#config.sops.secrets.telegraf.path;
|
|
database = "telegraf";
|
|
};
|
|
}
|
|
];
|
|
in {
|
|
config = lib.mkIf cfg.enable {
|
|
services.telegraf = {
|
|
enable = true;
|
|
environmentFiles = [ config.sops.secrets.telegraf.path ];
|
|
extraConfig = lib.mkMerge [
|
|
baseConfig
|
|
inputsConfig
|
|
outputsConfig
|
|
cfg.extraConfig
|
|
];
|
|
};
|
|
|
|
users.users.telegraf.extraGroups = dockerGroups;
|
|
|
|
systemd.services.telegraf = {
|
|
path =
|
|
lib.optionals (hasCollector "smart") [ pkgs.smartmontools pkgs.nvme-cli ]
|
|
++ lib.optionals (hasCollector "gpu") [ pkgs.custom.amdgpu_top pkgs.jq ];
|
|
serviceConfig.SupplementaryGroups = dockerGroups;
|
|
};
|
|
|
|
security.sudo.extraRules = lib.optionals (hasCollector "smart") [{
|
|
users = [ "telegraf" ];
|
|
commands = [{
|
|
command = "${pkgs.smartmontools}/bin/smartctl";
|
|
options = [ "NOPASSWD" ];
|
|
}];
|
|
}];
|
|
};
|
|
}
|