{ config, lib, pkgs, ... }: let cfg = config.syscfg.monitoring.telegraf; hasCollector = name: builtins.elem name cfg.collectors; dockerGroups = lib.optionals (cfg.enable && hasCollector "docker" && config.virtualisation.podman.enable) [ "podman" ] ++ lib.optionals (cfg.enable && hasCollector "docker" && config.virtualisation.docker.enable) [ "docker" ]; amdgpuMetricsScript = pkgs.writeShellScript "telegraf-amdgpu-metrics" '' set -euo pipefail ${lib.getExe pkgs.custom.amdgpu_top} -J -n 1 | ${lib.getExe pkgs.jq} -r ' def maybe_int($name; $value): if $value == null then empty else "\($name)=\(($value | floor))i" end; def maybe_float($name; $value): if $value == null then empty else "\($name)=\($value)" end; .devices | to_entries[] | [ maybe_int("utilization_gpu"; (.value.gpu_activity.GFX.value // .value.GRBM2["Command Processor - Graphics"].value // 0)), maybe_int("utilization_media"; .value.gpu_activity.MediaEngine.value), maybe_int("utilization_memory"; .value.gpu_activity.Memory.value), maybe_float("temperature_edge"; .value.Sensors["Edge Temperature"].value), maybe_float("power_draw"; .value.gpu_metrics.average_socket_power.value) ] as $fields | map(select(length > 0)) as $nonempty | select(($nonempty | length) > 0) | "amdgpu,card=\(.key) " + ($nonempty | join(",")) ' ''; baseConfig = { agent = { interval = "10s"; round_interval = true; metric_batch_size = 1000; metric_buffer_limit = 10000; flush_interval = "10s"; hostname = config.syscfg.hostname; omit_hostname = false; }; global_tags = { host = config.syscfg.hostname; }; }; inputsConfig = lib.mkMerge [ (lib.mkIf (hasCollector "cpu") { inputs.cpu = { percpu = true; totalcpu = true; collect_cpu_time = false; report_active = false; fielddrop = [ "usage_guest" "usage_guest_nice" "usage_irq" "usage_nice" "usage_softirq" "usage_steal" ]; }; }) (lib.mkIf (hasCollector "mem") { inputs.mem = { fielddrop = [ "available_percent" "commit_limit" "committed_as" "high_free" "high_total" "huge_page_size" "huge_pages_free" "huge_pages_total" "low_free" "low_total" "mapped" "page_tables" "slab" "sreclaimable" "sunreclaim" "swap_cached" "swap_free" "swap_total" "vmalloc_chunk" "vmalloc_total" "vmalloc_used" "write_back" "write_back_tmp" ]; }; }) (lib.mkIf (hasCollector "swap") { inputs.swap = { fielddrop = [ "free" ]; }; }) (lib.mkIf (hasCollector "system") { inputs.system = { fielddrop = [ "n_physical_cpus" "n_unique_users" "uptime_format" ]; }; }) (lib.mkIf (hasCollector "disk") { inputs.disk = { ignore_fs = [ "tmpfs" "devtmpfs" "devfs" "overlay" "squashfs" ]; fielddrop = [ "free" "inodes_free" "inodes_total" "inodes_used" "inodes_used_percent" ]; }; }) (lib.mkIf (hasCollector "diskio") { inputs.diskio = { skip_serial_number = true; fielddrop = [ "io_svctm" "iops_in_progress" "merged_reads" "merged_writes" "weighted_io_time" ]; }; }) (lib.mkIf (hasCollector "kernel") { inputs.kernel = { fielddrop = [ "boot_time" ]; }; }) (lib.mkIf (hasCollector "net") { inputs.net = { fielddrop = [ "bytes_recv" "bytes_sent" "speed" ]; }; }) (lib.mkIf (hasCollector "netstat") { inputs.netstat = { fielddrop = [ "tcp_close" "tcp_close_wait" "tcp_closing" "tcp_fin_wait1" "tcp_fin_wait2" "tcp_last_ack" "tcp_none" "tcp_syn_recv" "tcp_syn_sent" ]; }; }) (lib.mkIf (hasCollector "processes") { inputs.processes = { fielddrop = [ "dead" "idle" "paging" "stopped" "unknown" "zombies" ]; }; }) (lib.mkIf (hasCollector "temp") { inputs.temp = { }; }) (lib.mkIf (hasCollector "mdstat") { inputs.mdstat = { }; }) (lib.mkIf (hasCollector "smart") { inputs.smart = { use_sudo = true; attributes = true; }; }) (lib.mkIf (hasCollector "docker") { inputs.docker = [ { endpoint = "unix:///var/run/docker.sock"; timeout = "5s"; perdevice_include = [ ]; total_include = [ ]; docker_label_exclude = [ "*" ]; tagexclude = [ "container_image" "container_status" "container_version" "engine_host" "server_version" ]; namedrop = [ "docker_container_health" "docker_container_mem" "docker_container_status" ]; fielddrop = [ "memory_total" "n_cpus" "n_goroutines" "n_listener_events" "n_used_file_descriptors" "server_version" ]; } { endpoint = "unix:///var/run/docker.sock"; timeout = "5s"; perdevice_include = [ ]; total_include = [ ]; docker_label_exclude = [ "*" ]; tagexclude = [ "container_image" "container_status" "container_version" "engine_host" "server_version" ]; namepass = [ "docker_container_mem" ]; fielddrop = [ "active_anon" "active_file" "container_id" "hierarchical_memory_limit" "inactive_anon" "inactive_file" "mapped_file" "max_usage" "pgfault" "pgmajfault" "pgpgin" "pgpgout" "rss_huge" "total_active_anon" "total_active_file" "total_cache" "total_inactive_anon" "total_inactive_file" "total_mapped_file" "total_pgfault" "total_pgmajfault" "total_pgpgin" "total_pgpgout" "total_rss" "total_rss_huge" "total_unevictable" "total_writeback" "unevictable" "writeback" ]; } ]; }) (lib.mkIf (hasCollector "ping") { inputs.ping = { urls = [ "1.1.1.1" ]; count = 4; interval = "60s"; timeout = 5.0; binary = "${pkgs.iputils}/bin/ping"; fielddrop = [ "packets_received" "packets_transmitted" ]; }; }) (lib.mkIf (hasCollector "internet_speed") { inputs.internet_speed = { interval = "30m"; cache = true; memory_saving_mode = true; }; }) (lib.mkIf (hasCollector "gpu") { inputs.exec = [{ commands = [ amdgpuMetricsScript ]; timeout = "5s"; data_format = "influx"; }]; }) ]; outputsConfig = lib.mkMerge [{ outputs.influxdb_v3 = { urls = cfg.outputs; token = "$INFLUX_TOKEN";#config.sops.secrets.telegraf.path; database = "telegraf"; }; } ]; in { config = lib.mkIf cfg.enable { services.telegraf = { enable = true; environmentFiles = [ config.sops.secrets.telegraf.path ]; extraConfig = lib.mkMerge [ baseConfig inputsConfig outputsConfig cfg.extraConfig ]; }; users.users.telegraf.extraGroups = dockerGroups; systemd.services.telegraf = { path = lib.optionals (hasCollector "smart") [ pkgs.smartmontools pkgs.nvme-cli ] ++ lib.optionals (hasCollector "gpu") [ pkgs.custom.amdgpu_top pkgs.jq ]; serviceConfig.SupplementaryGroups = dockerGroups; }; security.sudo.extraRules = lib.optionals (hasCollector "smart") [{ users = [ "telegraf" ]; commands = [{ command = "${pkgs.smartmontools}/bin/smartctl"; options = [ "NOPASSWD" ]; }]; }]; }; }