first commit

This commit is contained in:
2023-06-27 09:16:37 +02:00
commit 3ec5ce616e
77 changed files with 3044 additions and 0 deletions

View File

@@ -0,0 +1 @@
SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{idProduct}=="6001", SYMLINK+="rfxcom", MODE="0666"

View File

@@ -0,0 +1 @@
SUBSYSTEM=="tty", ATTRS{idVendor}=="0451", ATTRS{idProduct}=="16a8", SYMLINK+="zigbee-serial", MODE="0666"

View File

@@ -0,0 +1,16 @@
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 5m
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']

View File

@@ -0,0 +1,10 @@
[Unit]
Description=Check for image updates on configured podman containers
[Service]
Type=oneshot
User=root
ExecStart=/root/bin/check_image_updates.sh
[Install]
WantedBy=default.target

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
URL="{{ pillar['podman']['gotify']['url'] }}"
TOKEN="{{ pillar['podman']['gotify']['token'] }}"
TITLE="Updates on $HOSTNAME"
PRIORITY="{{ pillar['podman']['gotify']['priority'] }}"
{% raw -%}
function check_update(){
IFS=',' read -r -a container_info <<< "$(podman container inspect $1 --format '{{ .Name }},{{ .ImageName }},{{ .Image }}')"
podman pull "${container_info[1]}"
if [[ "$(podman image inspect "${container_info[1]}" --format "{{.Id}}")" != "${container_info[2]}" ]];then
containers[${#containers[@]}]="${container_info[0]}"
fi
}
IFS=$'\n'
for line in $(podman container ls -q); do
check_update "$line"
done
if [[ "${#containers[@]}" == "0" ]]; then
exit
fi
MESSAGE=$(cat << EOM
Following ${#containers[@]} container(s) has updates:
${containers[*]}
EOM
)
curl "$URL/message?token=$TOKEN" -F "title=$TITLE" -F "priority=$PRIORITY" -F "message=$MESSAGE"
echo " "
{% endraw -%}

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic backup timer
[Timer]
OnCalendar=Sun, 12:00
Unit=check_image_updates.service
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,144 @@
#!/usr/bin/env bash
function pull_image(){
if ! podman image exists {{ args['image'] }}:{{ args['tag'] }}; then
podman pull {{ args['image'] }}:{{ args['tag'] }}
fi
}
function create_container() {
if ! podman container exists {{ container }};then
podman container create \
--name {{ container }} \
{%- if args['podman_options'] is defined %}
{%- for option, value in args['podman_options'].items() %}
--{{ option }} {{ value }} \
{%- endfor %}
{%- endif %}
{%- if args['volumes'] is defined %}
{%- for volume, mount in args['volumes'].items() %}
-v {{ volume }}:{{ mount }} \
{%- endfor %}
{%- endif %}
{%- if args['ports'] is defined %}
{%- for ports in args['ports'] %}
-p {{ ports['host'] }}:{{ ports['container'] }}{% if ports['protocol'] is defined %}/{{ ports['protocol'] }}{% endif %} \
{%- endfor %}
{%- endif %}
{%- if args['env'] is defined %}
{%- for key, value in args['env'].items() %}
-e {{ key }}={{ value }} \
{%- endfor %}
{%- endif %}
{%- if args['devices'] is defined %}
{%- for key, value in args['devices'].items() %}
--device {{ key }}:{{ value}} \
{%- endfor %}
{%- endif %}
{{ args['image'] }}:{{ args['tag'] }}{%- if args['run'] is defined %} \
{{ args['run'] }}
{%- endif %}
fi
}
function generate_systemd_unit_file() {
podman generate systemd --name {{ container }} > /etc/systemd/system/{{ container }}.service
}
function check_update() {
podman pull {{ args['image'] }}:{{ args['tag'] }}
if [[ "$(podman image inspect {{ args['image'] }}:{{ args['tag'] }} --format "{% raw %}{{.Id}}{% endraw %}")" == "$(podman inspect {{ container }} --format "{% raw %}{{ .Image }}{% endraw %}")" ]];then
echo "No image updates available"
return 0
else
echo "Image update available"
return 1
fi
}
function update() {
systemctl stop {{ container }}
podman container rm {{ container }}
create_container
generate_systemd_unit_file
systemctl daemon-reload
systemctl enable --now {{ container }}.service
}
function printHelp(){
cat << EOF
Usage ${0##*/} [options..]
-h,-?, --help Show help and exit
-p, --pull pull container image ({{ container }}:{{ args['tag'] }})
-v, --volumes create container volumes
-c, --create create {{ container }} containers
-s, --start start and enables {{ container }} container
-S, --stop stop {{ container }} container
-i, --is-running check to see if container service is running
-u, --check-update check if there are image updates avaiable
--update perform image update if it exists
-g, --generate-systemd generate user systemd service unit file
EOF
}
while :; do
case $1 in
-h|-\?|--help)
printHelp
exit
;;
-p|--pull)
pull_image
shift
;;
-v|--volumes)
create_volumes
shift
;;
-c|--create)
create_container
shift
;;
-s|--start)
systemctl --user enable --now {{ container }}.service
shift
;;
-S|--stop)
systemctl --user stop {{ container }}.service
shift
;;
-i|--is-running)
systemctl --user is-active {{ container }}.service
exit $?
shift
;;
-g|--generate-systemd)
generate_systemd_unit_file
shift
;;
-u|--check-update)
check_update
shift
;;
--update)
update
shift
;;
--) #End of all options
shift
break
;;
-?*)
printf "'%s' is not a valid option\n" "$1" >&2
exit 1
;;
*) #Break out of case, no more options
break
esac
shift
done

View File

@@ -0,0 +1,3 @@
{% for key, value in env_vars.items() -%}
{{ key }}={{ value }}
{% endfor -%}

View File

@@ -0,0 +1,40 @@
auth_enabled: false
server:
http_listen_port: 3100
ingester:
lifecycler:
address: 127.0.0.1
ring:
kvstore:
store: inmemory
replication_factor: 1
final_sleep: 0s
chunk_idle_period: 5m
chunk_retain_period: 30s
wal:
dir: /data/wal
schema_config:
configs:
- from: 2020-05-15
store: boltdb
object_store: filesystem
schema: v11
index:
prefix: index_
period: 168h
storage_config:
boltdb:
directory: /data/loki/index
filesystem:
directory: /data/loki/chunks
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h

View File

@@ -0,0 +1,12 @@
{%- set user = salt['pillar.get']('podman:user', 'root') %}
{%- set home = salt['user.info'](user).home %}
[Unit]
Description=Dump all mariadb databases
[Service]
Type=oneshot
User={{ user }}
ExecStart={{ home }}/bin/mysql-dump.sh
[Install]
WantedBy=default.target

View File

@@ -0,0 +1,15 @@
#!/bin/bash
umask 0077
BACKUP_DIR={{ pillar.containers.mariadb.backup_dir }}
databases=$(podman exec -it mariadb mysql -B -u root -p{{ pillar.containers.mariadb.env.MYSQL_ROOT_PASSWORD }} -e "SHOW DATABASES;" | tr -d "| " | grep -v Database)
for db in ${databases[@]}; do
db=${db::-1}
if [[ "$db" != "information_schema" ]] && [[ "$db" != "performance_schema" ]] && [[ "$db" != "mysql" ]] && [[ "$db" != _* ]] && [[ "$db" != "sys" ]]; then
echo "Dumping database: $db"
podman exec -it mariadb mysqldump -u root -p{{ pillar.containers.mariadb.env.MYSQL_ROOT_PASSWORD }} --databases $db | gzip > ${BACKUP_DIR}/$(date +"%Y-%m-%d_%H-%M-%S")_$db-sql.gz
fi
done
# Delete the files older than 3 days
find $BACKUP_DIR/* -type f -name *-sql.gz -mtime +3 -exec rm {} \;

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic backup timer
[Timer]
OnCalendar={{ pillar.containers.mariadb.OnCalendar }}
Unit=mysql-dump.service
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1 @@
net.ipv4.ip_unprivileged_port_start=80

View File

@@ -0,0 +1,292 @@
groups:
- name: node_exporter_alerts
rules:
- alert: Node down
expr: up{job="monitoring-pi"} == 0
for: 2m
labels:
severity: warning
annotations:
title: Node {{ $labels.instance }} is down
description: Failed to scrape {{ $labels.job }} on {{ $labels.instance }} for more than 2 minutes. Node seems down.
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 2m
labels:
severity: warning
annotations:
summary: Host out of memory (instance {{ $labels.instance }})
description: Node memory is filling up (< 10% left)\n VALUE = {{ $value }}
- alert: HostMemoryUnderMemoryPressure
expr: rate(node_vmstat_pgmajfault[1m]) > 1000
for: 2m
labels:
severity: warning
annotations:
summary: Host memory under memory pressure (instance {{ $labels.instance }})
description: The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}
- alert: HostUnusualNetworkThroughputIn
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: Host unusual network throughput in (instance {{ $labels.instance }})
description: Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}
- alert: HostUnusualNetworkThroughputOut
expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: Host unusual network throughput out (instance {{ $labels.instance }})
description: Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}
- alert: HostUnusualDiskReadRate
expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50
for: 5m
labels:
severity: warning
annotations:
summary: Host unusual disk read rate (instance {{ $labels.instance }})
description: Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}
- alert: HostUnusualDiskWriteRate
expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50
for: 2m
labels:
severity: warning
annotations:
summary: Host unusual disk write rate (instance {{ $labels.instance }})
description: Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}
# Please add ignored mountpoints in node_exporter parameters like
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
- alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
for: 2m
labels:
severity: warning
annotations:
summary: Host out of disk space (instance {{ $labels.instance }})
description: Disk is almost full (< 10% left)\n VALUE = {{ $value }}
# Please add ignored mountpoints in node_exporter parameters like
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
- alert: HostDiskWillFillIn24Hours
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
for: 2m
labels:
severity: warning
annotations:
summary: Host disk will fill in 24 hours (instance {{ $labels.instance }})
description: Filesystem is predicted to run out of space within the next 24 hours at current write rate\n VALUE = {{ $value }}
- alert: HostOutOfInodes
expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: Host out of inodes (instance {{ $labels.instance }})
description: Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}
- alert: HostInodesWillFillIn24Hours
expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: Host inodes will fill in 24 hours (instance {{ $labels.instance }})
description: Filesystem is predicted to run out of inodes within the next 24 hours at current write rate\n VALUE = {{ $value }}
- alert: HostUnusualDiskReadLatency
expr: rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m]) > 0
for: 2m
labels:
severity: warning
annotations:
summary: Host unusual disk read latency (instance {{ $labels.instance }})
description: Disk latency is growing (read operations > 100ms)\n VALUE = {{ $value }}
- alert: HostUnusualDiskWriteLatency
expr: rate(node_disk_write_time_seconds_totali{device!~"mmcblk.+"}[1m]) / rate(node_disk_writes_completed_total{device!~"mmcblk.+"}[1m]) > 0.1 and rate(node_disk_writes_completed_total{device!~"mmcblk.+"}[1m]) > 0
for: 2m
labels:
severity: warning
annotations:
summary: Host unusual disk write latency (instance {{ $labels.instance }})
description: Disk latency is growing (write operations > 100ms)\n VALUE = {{ $value }}
- alert: HostHighCpuLoad
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
for: 0m
labels:
severity: warning
annotations:
summary: Host high CPU load (instance {{ $labels.instance }})
description: CPU load is > 80%\n VALUE = {{ $value }}
- alert: HostCpuStealNoisyNeighbor
expr: avg by(instance) (rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100 > 10
for: 0m
labels:
severity: warning
annotations:
summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }})
description: CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}
# 1000 context switches is an arbitrary number.
# Alert threshold depends on nature of application.
# Please read: https://github.com/samber/awesome-prometheus-alerts/issues/58
- alert: HostContextSwitching
expr: (rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000
for: 0m
labels:
severity: warning
annotations:
summary: Host context switching (instance {{ $labels.instance }})
description: Context switching is growing on node (> 1000 / s)\n VALUE = {{ $value }}
- alert: HostSwapIsFillingUp
expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
for: 2m
labels:
severity: warning
annotations:
summary: Host swap is filling up (instance {{ $labels.instance }})
description: Swap is filling up (>80%)\n VALUE = {{ $value }}
- alert: HostSystemdServiceCrashed
expr: node_systemd_unit_state{state="failed"} == 1
for: 0m
labels:
severity: warning
annotations:
summary: Host SystemD service crashed (instance {{ $labels.instance }})
description: SystemD service crashed\n VALUE = {{ $value }}
- alert: HostPhysicalComponentTooHot
expr: node_hwmon_temp_celsius > 75
for: 5m
labels:
severity: warning
annotations:
summary: Host physical component too hot (instance {{ $labels.instance }})
description: Physical hardware component too hot\n VALUE = {{ $value }}
- alert: HostNodeOvertemperatureAlarm
expr: node_hwmon_temp_crit_alarm_celsius == 1
for: 0m
labels:
severity: critical
annotations:
summary: Host node overtemperature alarm (instance {{ $labels.instance }})
description: Physical node temperature alarm triggered\n VALUE = {{ $value }}
- alert: HostRaidArrayGotInactive
expr: node_md_state{state="inactive"} > 0
for: 0m
labels:
severity: critical
annotations:
summary: Host RAID array got inactive (instance {{ $labels.instance }})
description: RAID array {{ $labels.device }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.\n VALUE = {{ $value }}
- alert: HostRaidDiskFailure
expr: node_md_disks{state="failed"} > 0
for: 2m
labels:
severity: warning
annotations:
summary: Host RAID disk failure (instance {{ $labels.instance }})
description: At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n VALUE = {{ $value }}
- alert: HostOomKillDetected
expr: increase(node_vmstat_oom_kill[1m]) > 0
for: 0m
labels:
severity: warning
annotations:
summary: Host OOM kill detected (instance {{ $labels.instance }})
description: OOM kill detected\n VALUE = {{ $value }}
- alert: HostEdacCorrectableErrorsDetected
expr: increase(node_edac_correctable_errors_total[1m]) > 0
for: 0m
labels:
severity: info
annotations:
summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }})
description: Instance has had {{ printf "%.0f" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}
- alert: HostEdacUncorrectableErrorsDetected
expr: node_edac_uncorrectable_errors_total > 0
for: 0m
labels:
severity: warning
annotations:
summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})
description: Instance has had {{ printf "%.0f" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}
- alert: HostNetworkReceiveErrors
expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
for: 2m
labels:
severity: warning
annotations:
summary: Host Network Receive Errors (instance {{ $labels.instance }}:{{ $labels.device }})
description: Instance interface has encountered {{ printf "%.0f" $value }} receive errors in the last five minutes.\n VALUE = {{ $value }}
- alert: HostNetworkTransmitErrors
expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
for: 2m
labels:
severity: warning
annotations:
summary: Host Network Transmit Errors (instance {{ $labels.instance }}:{{ $labels.device }})
description: Instance has encountered {{ printf "%.0f" $value }} transmit errors in the last five minutes.\n VALUE = {{ $value }}
- alert: HostNetworkInterfaceSaturated
expr: (rate(node_network_receive_bytes_total{device!~"^tap.*"}[1m]) + rate(node_network_transmit_bytes_total{device!~"^tap.*"}[1m])) / node_network_speed_bytes{device!~"^tap.*"} > 0.8
for: 1m
labels:
severity: warning
annotations:
summary: Host Network Interface Saturated (instance {{ $labels.instance }}:{{ $labels.interface }})
description: The network interface is getting overloaded.\n VALUE = {{ $value }}
- alert: HostConntrackLimit
expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8
for: 5m
labels:
severity: warning
annotations:
summary: Host conntrack limit (instance {{ $labels.instance }})
description: The number of conntrack is approching limit\n VALUE = {{ $value }}
- alert: HostClockSkew
expr: (node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)
for: 2m
labels:
severity: warning
annotations:
summary: Host clock skew (instance {{ $labels.instance }})
description: Clock skew detected. Clock is out of sync.\n VALUE = {{ $value }}
- alert: HostClockNotSynchronising
expr: min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16
for: 2m
labels:
severity: warning
annotations:
summary: Host clock not synchronising (instance {{ $labels.instance }})
description: Clock not synchronising.\n VALUE = {{ $value }}

View File

@@ -0,0 +1,59 @@
# my global config #
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 10.2.0.22:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "alert.node.yml"
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node"
static_configs:
- targets:
- "poblano.rre.nu:9100"
- "salt.rre.nu:9100"
- "pepper.rre.nu:9100"
- "woody.rre.nu:9100"
- "serrano.rre.nu:9100"
- "coronado.rre.nu:9100"
- job_name: "unpoller"
static_configs:
- targets:
- "unpoller.rre.nu:9130"
- job_name: "fail2ban"
static_configs:
- targets:
- "poblano.rre.nu:9191"
- "salt.rre.nu:9191"
- "pepper.rre.nu:9191"
- job_name: "nginx"
static_configs:
- targets:
- "10.2.0.22:9193"
- job_name: "promtail"
static_configs:
- targets:
- "serrano.rre.nu:9080"
- "coronado.rre.nu:9080"

View File

@@ -0,0 +1,29 @@
server:
http_listen_port: {{ http_listen_port }}
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: "{{ client_url }}"
scrape_configs:
- job_name: journal
journal:
max_age: 12h
path: /var/log/journal
labels:
job: systemd-journal
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: 'unit'
- source_labels: ["__journal__hostname"]
target_label: host
- source_labels: ["__journal_priority_keyword"]
target_label: level
- source_labels: ["__journal_syslog_identifier"]
target_label: syslog_identifier
- source_labels: ["__journal_container_name"]
target_label: container_name

View File

@@ -0,0 +1,234 @@
# Unpoller v2 primary configuration file. TOML FORMAT #
###########################################################
[poller]
# Turns on line numbers, microsecond logging, and a per-device log.
# The default is false, but I personally leave this on at home (four devices).
# This may be noisy if you have a lot of devices. It adds one line per device.
debug = false
# Turns off per-interval logs. Only startup and error logs will be emitted.
# Recommend enabling debug with this setting for better error logging.
quiet = false
# Load dynamic plugins. Advanced use; only sample mysql plugin provided by default.
plugins = []
#### OUTPUTS
# If you don't use an output, you can disable it.
[prometheus]
disable = false
# This controls on which ip and port /metrics is exported when mode is "prometheus".
# This has no effect in other modes. Must contain a colon and port.
http_listen = "0.0.0.0:9130"
# Adding an SSL Cert and Cert Key will make Poller listen with SSL/https.
ssl_cert_path = ""
ssl_key_path = ""
# Errors are rare. Setting this to true will report them to Prometheus.
report_errors = false
## Record data for disabled or down (unlinked) switch ports.
dead_ports = false
[influxdb]
disable = true
# InfluxDB does not require auth by default, so the user/password are probably unimportant.
url = "http://127.0.0.1:8086"
user = "unifipoller"
# Password for InfluxDB user (above).
# If the password provided here begins with file:// then the password is read in from
# the file path that follows the file:// prefix. ex: file:///etc/influxdb/passwd.file
pass = "unifipoller"
# Be sure to create this database. See the InfluxDB Wiki page for more info.
db = "unifi"
# If your InfluxDB uses a valid SSL cert, set this to true.
verify_ssl = false
# The UniFi Controller only updates traffic stats about every 30 seconds.
# Setting this to something lower may lead to "zeros" in your data.
# If you're getting zeros now, set this to "1m"
interval = "30s"
## Record data for disabled or down (unlinked) switch ports.
dead_ports = false
# To enable output of UniFi Events to Loki, add a URL; it's disabled otherwise.
# User, pass and tenant_id are optional and most folks wont set them.
# Pick which logs you want per-controller in the [unifi.controller] section.
# This is a new feature. Feedback welcome!
[loki]
disable = false
url = "{{ pillar['containers']['unpoller']['loki_url'] }}"
# The rest of this is advanced & optional. See wiki.
user = ""
pass = ""
verify_ssl = false
tenant_id = ""
interval = "2m"
timeout = "10s"
[datadog]
# How often to poll UniFi and report to Datadog.
interval = "2m"
# To enable this output plugin
enable = false
# Datadog Custom Options
# address to talk to the datadog agent, by default this uses the local statsd UDP interface
# address = "localhost:8125"
# namespace to prepend to all data, default is no additional prefix.
# namespace = ""
# tags to append to all data
# tags = [ "customer:abc_corp" ]
# For more advanced options for very large amount of data collected see the upstream
# github.com/unpoller/unpoller/pkg/datadogunifi repository README.
# Unpoller has an optional web server. To turn it on, set enable to true. If you
# wish to use SSL, provide SSL cert and key paths. This interface is currently
# read-only; it just displays information, like logs, devices and clients.
# Notice: Enabling the web server with many sites will increase memory usage.
# This is a new feature and lacks a UI, enabling only recommended for testing.
[webserver]
enable = false
port = 37288
# The HTML path is different on Windows and BSD/macOS.
html_path = "/usr/lib/unifi-poller/web"
ssl_cert_path = ""
ssl_key_path = ""
# How many events per event group to hold. 200-2000. Use fewer with many sites.
# With 1 site, you'll have a max total of 9 event groups; 1 per plugin, 4 per site.
# Each site adds 1 event group for each of these inputs that is enabled:
# save_ids, save_events, save_anomalies, save_alarms.
max_events = 200
# By default the web interface does not require authentication. You can change
# that by adding a username and password hash (or multiple) below.
# To create a hash, run unifi-poller with the -e CLI argument. See Wiki for more!
[webserver.accounts]
# username = "password-hash"
# captain = "$2a$04$mxw6i0LKH6u46oaLK2cq5eCTAAFkfNiRpzNbz.EyvJZZWNa2FzIlS"
#### INPUTS
[unifi]
# Setting this to true and providing default credentials allows you to skip
# configuring controllers in this config file. Instead you configure them in
# your prometheus.yml config. Prometheus then sends the controller URL to
# Unpoller when it performs the scrape. This is useful if you have many,
# or changing controllers. See wiki for more.
dynamic = false
# The following section contains the default credentials/configuration for any
# dynamic controller (see above section), or the primary controller if you do not
# provide one and dynamic is disabled. In other words, you can just add your
# controller here and delete the following section. The internal defaults are
# shown below. Any missing values will assume these displayed defaults.
[unifi.defaults]
# URL for the UniFi Controller. Do not add any paths after the host:port.
# Do not use port 8443 if you have a UDM; just use "https://ip".
url = "{{ pillar['containers']['unpoller']['unifi_url'] }}"
# Make a read-only user in the UniFi Admin Settings, allow it access to all sites.
user = "{{ pillar['containers']['unpoller']['unifi_user'] }}"
# Password for UniFi controller user (above).
# If the password provided here begins with file:// then the password is read in from
# the file path that follows the file:// prefix. ex: file:///etc/unifi/password.file
# ex: file:///etc/unifi/passwd.file, windows: file://C:\\UserData\\Unifi\\Passwd.txt
pass = "{{ pillar['containers']['unpoller']['unifi_pass'] }}"
# If the controller has more than one site, specify which sites to poll here.
# Set this to ["default"] to poll only the first site on the controller.
# A setting of ["all"] will poll all sites; this works if you only have 1 site too.
sites = ["all"]
# Specify a timeout, leave missing to declare infinite wait. This determines the maximum
# time to wait for a response from the unifi controller on any API request.
# timeout = 60s
# Enable collection of site data. This data powers the Network Sites dashboard.
# It's not valuable to everyone and setting this to false will save resources.
save_sites = true
# Hash, with md5, client names and MAC addresses. This attempts to protect
# personally identifiable information. Most users won't want to enable this.
hash_pii = false
# Enable collection of Intrusion Detection System Data (InfluxDB/Loki only).
# Only useful if IDS or IPS are enabled on one of the sites. This may store
# a lot of information. Only recommended for testing and debugging. There
# may not be any dashboards to display this data. It can be used for annotations.
# Enable this only if using InfluxDB or Loki. This will leak PII data!
save_ids = false
# Enable collection of UniFi Events (InfluxDB/Loki only).
# This may store a lot of information. Only recommended for testing and debugging.
# There are no dashboards to display this data. It can be used for annotations.
# This is a new (June, 2020) feature. Please provide feedback if you try it out!
# Enable this only if using InfluxDB or Loki. This will leak PII data!
save_events = true
# Enable collection of UniFi Alarms (InfluxDB/Loki only).
# There are no dashboards to display this data. It can be used for annotations.
# This is a new (June, 2020) feature. Please provide feedback if you try it out!
# Enable this only if using InfluxDB or Loki. This will leak PII data!
save_alarms = true
# Enable collection of UniFi Anomalies (InfluxDB/Loki only).
# There are no dashboards to display this data. It can be used for annotations.
# This is a new (June, 2020) feature. Please provide feedback if you try it out!
# Enable this only if using InfluxDB or Loki.
save_anomalies = true
# Enable collection of Deep Packet Inspection data. This data breaks down traffic
# types for each client and site, it powers a dedicated DPI dashboard.
# Enabling this adds roughly 150 data points per client. That's 6000 metrics for
# 40 clients. This adds a little bit of poller run time per interval and causes
# more API requests to your controller(s). Don't let these "cons" sway you:
# it's cool data. Please provide feedback on your experience with this feature.
save_dpi = false
## Enabling save_rogue stores even more data in your time series databases.
## This saves neighboring access point metrics in a dedicated table or namespace.
save_rogue = false
# If your UniFi controller has a valid SSL certificate (like lets encrypt),
# you can enable this option to validate it. Otherwise, any SSL certificate is
# valid. If you don't know if you have a valid SSL cert, then you don't have one.
verify_ssl = false
## You may provide a list of SSL cert files (PEM format) that you expect your
## controller to use. As long as one of the certs you provide here shows up in
## the cert trust chain the controller presents it will be accepted and allowed.
## These files may be re-read while poller is running.
## Example: ssl_cert_paths = ["/path/to/cert.pem", "/another/cert.pem"]
ssl_cert_paths = []
# The following is optional and used for configurations with multiple UniFi controllers.
# You may repeat the following [[unifi.controller]] section as many times as needed to
# poll multiple controllers. Uncomment the entire section including [[unifi.controller]].
# Omitted variables will have their values taken from the defaults, above.
#
#[[unifi.controller]]
# url = "https://127.0.0.1:8443"
# user = "unifipoller"
# pass = "unifipoller"
# sites = ["all"]
# save_sites = true
# hash_pii = false
# save_ids = false
# save_events = false
# save_alarms = false
# save_anomalies = false
# save_dpi = false
# save_rogue = false
# verify_ssl = false
# ssl_cert_paths = []