first commit
This commit is contained in:
26
containers/alertmanager.sls
Normal file
26
containers/alertmanager.sls
Normal file
@@ -0,0 +1,26 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create alertmanager config directory:
|
||||
file.recurse:
|
||||
- name: /etc/alertmanager
|
||||
- source: salt://containers/files/alertmanager
|
||||
- user: nobody
|
||||
- group: root
|
||||
- dir_mode: "0755"
|
||||
- file_mode: "0644"
|
||||
|
||||
Create prometheus data directory:
|
||||
file.directory:
|
||||
- name: /srv/alertmanager
|
||||
- user: nobody
|
||||
- group: nobody
|
||||
- mode: "0755"
|
||||
|
||||
{{ container_deploy('alertmanager') }}
|
||||
|
||||
Start prometheus service:
|
||||
service.running:
|
||||
- name: alertmanager
|
||||
- enable: True
|
||||
- watch:
|
||||
- file: Create alertmanager config directory
|
3
containers/fail2ban_exporter.sls
Normal file
3
containers/fail2ban_exporter.sls
Normal file
@@ -0,0 +1,3 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
{{ container_deploy('fail2ban_exporter') }}
|
1
containers/files/99-rfxcom-serial.rules
Normal file
1
containers/files/99-rfxcom-serial.rules
Normal file
@@ -0,0 +1 @@
|
||||
SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{idProduct}=="6001", SYMLINK+="rfxcom", MODE="0666"
|
1
containers/files/99-zigbee-serial.rules
Normal file
1
containers/files/99-zigbee-serial.rules
Normal file
@@ -0,0 +1 @@
|
||||
SUBSYSTEM=="tty", ATTRS{idVendor}=="0451", ATTRS{idProduct}=="16a8", SYMLINK+="zigbee-serial", MODE="0666"
|
16
containers/files/alertmanager/alertmanager.yml
Normal file
16
containers/files/alertmanager/alertmanager.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
route:
|
||||
group_by: ['alertname']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 1h
|
||||
receiver: 'web.hook'
|
||||
receivers:
|
||||
- name: 'web.hook'
|
||||
webhook_configs:
|
||||
- url: 'http://127.0.0.1:5001/'
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: 'critical'
|
||||
target_match:
|
||||
severity: 'warning'
|
||||
equal: ['alertname', 'dev', 'instance']
|
10
containers/files/check_image_updates.service.jinja
Normal file
10
containers/files/check_image_updates.service.jinja
Normal file
@@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=Check for image updates on configured podman containers
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=root
|
||||
ExecStart=/root/bin/check_image_updates.sh
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
35
containers/files/check_image_updates.sh.jinja
Normal file
35
containers/files/check_image_updates.sh.jinja
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
URL="{{ pillar['podman']['gotify']['url'] }}"
|
||||
TOKEN="{{ pillar['podman']['gotify']['token'] }}"
|
||||
TITLE="Updates on $HOSTNAME"
|
||||
PRIORITY="{{ pillar['podman']['gotify']['priority'] }}"
|
||||
|
||||
{% raw -%}
|
||||
function check_update(){
|
||||
IFS=',' read -r -a container_info <<< "$(podman container inspect $1 --format '{{ .Name }},{{ .ImageName }},{{ .Image }}')"
|
||||
|
||||
podman pull "${container_info[1]}"
|
||||
if [[ "$(podman image inspect "${container_info[1]}" --format "{{.Id}}")" != "${container_info[2]}" ]];then
|
||||
containers[${#containers[@]}]="${container_info[0]}"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
IFS=$'\n'
|
||||
for line in $(podman container ls -q); do
|
||||
check_update "$line"
|
||||
done
|
||||
if [[ "${#containers[@]}" == "0" ]]; then
|
||||
exit
|
||||
fi
|
||||
|
||||
MESSAGE=$(cat << EOM
|
||||
Following ${#containers[@]} container(s) has updates:
|
||||
${containers[*]}
|
||||
EOM
|
||||
)
|
||||
|
||||
curl "$URL/message?token=$TOKEN" -F "title=$TITLE" -F "priority=$PRIORITY" -F "message=$MESSAGE"
|
||||
echo " "
|
||||
{% endraw -%}
|
9
containers/files/check_image_updates.timer.jinja
Normal file
9
containers/files/check_image_updates.timer.jinja
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Restic backup timer
|
||||
|
||||
[Timer]
|
||||
OnCalendar=Sun, 12:00
|
||||
Unit=check_image_updates.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
144
containers/files/container.sh.jinja
Normal file
144
containers/files/container.sh.jinja
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
function pull_image(){
|
||||
if ! podman image exists {{ args['image'] }}:{{ args['tag'] }}; then
|
||||
podman pull {{ args['image'] }}:{{ args['tag'] }}
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function create_container() {
|
||||
if ! podman container exists {{ container }};then
|
||||
podman container create \
|
||||
--name {{ container }} \
|
||||
{%- if args['podman_options'] is defined %}
|
||||
{%- for option, value in args['podman_options'].items() %}
|
||||
--{{ option }} {{ value }} \
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- if args['volumes'] is defined %}
|
||||
{%- for volume, mount in args['volumes'].items() %}
|
||||
-v {{ volume }}:{{ mount }} \
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- if args['ports'] is defined %}
|
||||
{%- for ports in args['ports'] %}
|
||||
-p {{ ports['host'] }}:{{ ports['container'] }}{% if ports['protocol'] is defined %}/{{ ports['protocol'] }}{% endif %} \
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- if args['env'] is defined %}
|
||||
{%- for key, value in args['env'].items() %}
|
||||
-e {{ key }}={{ value }} \
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- if args['devices'] is defined %}
|
||||
{%- for key, value in args['devices'].items() %}
|
||||
--device {{ key }}:{{ value}} \
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{ args['image'] }}:{{ args['tag'] }}{%- if args['run'] is defined %} \
|
||||
{{ args['run'] }}
|
||||
{%- endif %}
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function generate_systemd_unit_file() {
|
||||
podman generate systemd --name {{ container }} > /etc/systemd/system/{{ container }}.service
|
||||
}
|
||||
|
||||
function check_update() {
|
||||
podman pull {{ args['image'] }}:{{ args['tag'] }}
|
||||
if [[ "$(podman image inspect {{ args['image'] }}:{{ args['tag'] }} --format "{% raw %}{{.Id}}{% endraw %}")" == "$(podman inspect {{ container }} --format "{% raw %}{{ .Image }}{% endraw %}")" ]];then
|
||||
echo "No image updates available"
|
||||
return 0
|
||||
else
|
||||
echo "Image update available"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function update() {
|
||||
systemctl stop {{ container }}
|
||||
podman container rm {{ container }}
|
||||
create_container
|
||||
generate_systemd_unit_file
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now {{ container }}.service
|
||||
}
|
||||
|
||||
|
||||
|
||||
function printHelp(){
|
||||
cat << EOF
|
||||
Usage ${0##*/} [options..]
|
||||
-h,-?, --help Show help and exit
|
||||
-p, --pull pull container image ({{ container }}:{{ args['tag'] }})
|
||||
-v, --volumes create container volumes
|
||||
-c, --create create {{ container }} containers
|
||||
-s, --start start and enables {{ container }} container
|
||||
-S, --stop stop {{ container }} container
|
||||
-i, --is-running check to see if container service is running
|
||||
-u, --check-update check if there are image updates avaiable
|
||||
--update perform image update if it exists
|
||||
-g, --generate-systemd generate user systemd service unit file
|
||||
EOF
|
||||
}
|
||||
|
||||
|
||||
while :; do
|
||||
case $1 in
|
||||
-h|-\?|--help)
|
||||
printHelp
|
||||
exit
|
||||
;;
|
||||
-p|--pull)
|
||||
pull_image
|
||||
shift
|
||||
;;
|
||||
-v|--volumes)
|
||||
create_volumes
|
||||
shift
|
||||
;;
|
||||
-c|--create)
|
||||
create_container
|
||||
shift
|
||||
;;
|
||||
-s|--start)
|
||||
systemctl --user enable --now {{ container }}.service
|
||||
shift
|
||||
;;
|
||||
-S|--stop)
|
||||
systemctl --user stop {{ container }}.service
|
||||
shift
|
||||
;;
|
||||
-i|--is-running)
|
||||
systemctl --user is-active {{ container }}.service
|
||||
exit $?
|
||||
shift
|
||||
;;
|
||||
-g|--generate-systemd)
|
||||
generate_systemd_unit_file
|
||||
shift
|
||||
;;
|
||||
-u|--check-update)
|
||||
check_update
|
||||
shift
|
||||
;;
|
||||
--update)
|
||||
update
|
||||
shift
|
||||
;;
|
||||
--) #End of all options
|
||||
shift
|
||||
break
|
||||
;;
|
||||
-?*)
|
||||
printf "'%s' is not a valid option\n" "$1" >&2
|
||||
exit 1
|
||||
;;
|
||||
*) #Break out of case, no more options
|
||||
break
|
||||
esac
|
||||
shift
|
||||
done
|
3
containers/files/env_file.jinja
Normal file
3
containers/files/env_file.jinja
Normal file
@@ -0,0 +1,3 @@
|
||||
{% for key, value in env_vars.items() -%}
|
||||
{{ key }}={{ value }}
|
||||
{% endfor -%}
|
40
containers/files/loki-config.yaml
Normal file
40
containers/files/loki-config.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
|
||||
ingester:
|
||||
lifecycler:
|
||||
address: 127.0.0.1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
replication_factor: 1
|
||||
final_sleep: 0s
|
||||
chunk_idle_period: 5m
|
||||
chunk_retain_period: 30s
|
||||
wal:
|
||||
dir: /data/wal
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-05-15
|
||||
store: boltdb
|
||||
object_store: filesystem
|
||||
schema: v11
|
||||
index:
|
||||
prefix: index_
|
||||
period: 168h
|
||||
|
||||
storage_config:
|
||||
boltdb:
|
||||
directory: /data/loki/index
|
||||
|
||||
filesystem:
|
||||
directory: /data/loki/chunks
|
||||
|
||||
limits_config:
|
||||
enforce_metric_name: false
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h
|
||||
|
12
containers/files/mysql-dump.service.jinja
Normal file
12
containers/files/mysql-dump.service.jinja
Normal file
@@ -0,0 +1,12 @@
|
||||
{%- set user = salt['pillar.get']('podman:user', 'root') %}
|
||||
{%- set home = salt['user.info'](user).home %}
|
||||
[Unit]
|
||||
Description=Dump all mariadb databases
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User={{ user }}
|
||||
ExecStart={{ home }}/bin/mysql-dump.sh
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
15
containers/files/mysql-dump.sh.jinja
Normal file
15
containers/files/mysql-dump.sh.jinja
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
umask 0077
|
||||
BACKUP_DIR={{ pillar.containers.mariadb.backup_dir }}
|
||||
databases=$(podman exec -it mariadb mysql -B -u root -p{{ pillar.containers.mariadb.env.MYSQL_ROOT_PASSWORD }} -e "SHOW DATABASES;" | tr -d "| " | grep -v Database)
|
||||
|
||||
for db in ${databases[@]}; do
|
||||
db=${db::-1}
|
||||
if [[ "$db" != "information_schema" ]] && [[ "$db" != "performance_schema" ]] && [[ "$db" != "mysql" ]] && [[ "$db" != _* ]] && [[ "$db" != "sys" ]]; then
|
||||
echo "Dumping database: $db"
|
||||
podman exec -it mariadb mysqldump -u root -p{{ pillar.containers.mariadb.env.MYSQL_ROOT_PASSWORD }} --databases $db | gzip > ${BACKUP_DIR}/$(date +"%Y-%m-%d_%H-%M-%S")_$db-sql.gz
|
||||
fi
|
||||
done
|
||||
# Delete the files older than 3 days
|
||||
find $BACKUP_DIR/* -type f -name *-sql.gz -mtime +3 -exec rm {} \;
|
||||
|
9
containers/files/mysql-dump.timer.jinja
Normal file
9
containers/files/mysql-dump.timer.jinja
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Restic backup timer
|
||||
|
||||
[Timer]
|
||||
OnCalendar={{ pillar.containers.mariadb.OnCalendar }}
|
||||
Unit=mysql-dump.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
1
containers/files/npm-container.conf
Normal file
1
containers/files/npm-container.conf
Normal file
@@ -0,0 +1 @@
|
||||
net.ipv4.ip_unprivileged_port_start=80
|
292
containers/files/prometheus/alert.node.yml
Normal file
292
containers/files/prometheus/alert.node.yml
Normal file
@@ -0,0 +1,292 @@
|
||||
groups:
|
||||
- name: node_exporter_alerts
|
||||
rules:
|
||||
- alert: Node down
|
||||
expr: up{job="monitoring-pi"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
title: Node {{ $labels.instance }} is down
|
||||
description: Failed to scrape {{ $labels.job }} on {{ $labels.instance }} for more than 2 minutes. Node seems down.
|
||||
|
||||
|
||||
- alert: HostOutOfMemory
|
||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host out of memory (instance {{ $labels.instance }})
|
||||
description: Node memory is filling up (< 10% left)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostMemoryUnderMemoryPressure
|
||||
expr: rate(node_vmstat_pgmajfault[1m]) > 1000
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host memory under memory pressure (instance {{ $labels.instance }})
|
||||
description: The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostUnusualNetworkThroughputIn
|
||||
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host unusual network throughput in (instance {{ $labels.instance }})
|
||||
description: Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostUnusualNetworkThroughputOut
|
||||
expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host unusual network throughput out (instance {{ $labels.instance }})
|
||||
description: Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostUnusualDiskReadRate
|
||||
expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host unusual disk read rate (instance {{ $labels.instance }})
|
||||
description: Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostUnusualDiskWriteRate
|
||||
expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host unusual disk write rate (instance {{ $labels.instance }})
|
||||
description: Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}
|
||||
|
||||
# Please add ignored mountpoints in node_exporter parameters like
|
||||
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
|
||||
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
|
||||
- alert: HostOutOfDiskSpace
|
||||
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host out of disk space (instance {{ $labels.instance }})
|
||||
description: Disk is almost full (< 10% left)\n VALUE = {{ $value }}
|
||||
|
||||
# Please add ignored mountpoints in node_exporter parameters like
|
||||
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
|
||||
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
|
||||
- alert: HostDiskWillFillIn24Hours
|
||||
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host disk will fill in 24 hours (instance {{ $labels.instance }})
|
||||
description: Filesystem is predicted to run out of space within the next 24 hours at current write rate\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostOutOfInodes
|
||||
expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host out of inodes (instance {{ $labels.instance }})
|
||||
description: Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostInodesWillFillIn24Hours
|
||||
expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host inodes will fill in 24 hours (instance {{ $labels.instance }})
|
||||
description: Filesystem is predicted to run out of inodes within the next 24 hours at current write rate\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostUnusualDiskReadLatency
|
||||
expr: rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m]) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host unusual disk read latency (instance {{ $labels.instance }})
|
||||
description: Disk latency is growing (read operations > 100ms)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostUnusualDiskWriteLatency
|
||||
expr: rate(node_disk_write_time_seconds_totali{device!~"mmcblk.+"}[1m]) / rate(node_disk_writes_completed_total{device!~"mmcblk.+"}[1m]) > 0.1 and rate(node_disk_writes_completed_total{device!~"mmcblk.+"}[1m]) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host unusual disk write latency (instance {{ $labels.instance }})
|
||||
description: Disk latency is growing (write operations > 100ms)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostHighCpuLoad
|
||||
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host high CPU load (instance {{ $labels.instance }})
|
||||
description: CPU load is > 80%\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostCpuStealNoisyNeighbor
|
||||
expr: avg by(instance) (rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100 > 10
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }})
|
||||
description: CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}
|
||||
|
||||
# 1000 context switches is an arbitrary number.
|
||||
# Alert threshold depends on nature of application.
|
||||
# Please read: https://github.com/samber/awesome-prometheus-alerts/issues/58
|
||||
- alert: HostContextSwitching
|
||||
expr: (rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host context switching (instance {{ $labels.instance }})
|
||||
description: Context switching is growing on node (> 1000 / s)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostSwapIsFillingUp
|
||||
expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host swap is filling up (instance {{ $labels.instance }})
|
||||
description: Swap is filling up (>80%)\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostSystemdServiceCrashed
|
||||
expr: node_systemd_unit_state{state="failed"} == 1
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host SystemD service crashed (instance {{ $labels.instance }})
|
||||
description: SystemD service crashed\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostPhysicalComponentTooHot
|
||||
expr: node_hwmon_temp_celsius > 75
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host physical component too hot (instance {{ $labels.instance }})
|
||||
description: Physical hardware component too hot\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostNodeOvertemperatureAlarm
|
||||
expr: node_hwmon_temp_crit_alarm_celsius == 1
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Host node overtemperature alarm (instance {{ $labels.instance }})
|
||||
description: Physical node temperature alarm triggered\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostRaidArrayGotInactive
|
||||
expr: node_md_state{state="inactive"} > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Host RAID array got inactive (instance {{ $labels.instance }})
|
||||
description: RAID array {{ $labels.device }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostRaidDiskFailure
|
||||
expr: node_md_disks{state="failed"} > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host RAID disk failure (instance {{ $labels.instance }})
|
||||
description: At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostOomKillDetected
|
||||
expr: increase(node_vmstat_oom_kill[1m]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host OOM kill detected (instance {{ $labels.instance }})
|
||||
description: OOM kill detected\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostEdacCorrectableErrorsDetected
|
||||
expr: increase(node_edac_correctable_errors_total[1m]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }})
|
||||
description: Instance has had {{ printf "%.0f" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostEdacUncorrectableErrorsDetected
|
||||
expr: node_edac_uncorrectable_errors_total > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})
|
||||
description: Instance has had {{ printf "%.0f" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostNetworkReceiveErrors
|
||||
expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host Network Receive Errors (instance {{ $labels.instance }}:{{ $labels.device }})
|
||||
description: Instance interface has encountered {{ printf "%.0f" $value }} receive errors in the last five minutes.\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostNetworkTransmitErrors
|
||||
expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host Network Transmit Errors (instance {{ $labels.instance }}:{{ $labels.device }})
|
||||
description: Instance has encountered {{ printf "%.0f" $value }} transmit errors in the last five minutes.\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostNetworkInterfaceSaturated
|
||||
expr: (rate(node_network_receive_bytes_total{device!~"^tap.*"}[1m]) + rate(node_network_transmit_bytes_total{device!~"^tap.*"}[1m])) / node_network_speed_bytes{device!~"^tap.*"} > 0.8
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host Network Interface Saturated (instance {{ $labels.instance }}:{{ $labels.interface }})
|
||||
description: The network interface is getting overloaded.\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostConntrackLimit
|
||||
expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host conntrack limit (instance {{ $labels.instance }})
|
||||
description: The number of conntrack is approching limit\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostClockSkew
|
||||
expr: (node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host clock skew (instance {{ $labels.instance }})
|
||||
description: Clock skew detected. Clock is out of sync.\n VALUE = {{ $value }}
|
||||
|
||||
- alert: HostClockNotSynchronising
|
||||
expr: min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Host clock not synchronising (instance {{ $labels.instance }})
|
||||
description: Clock not synchronising.\n VALUE = {{ $value }}
|
||||
|
59
containers/files/prometheus/prometheus.yml
Normal file
59
containers/files/prometheus/prometheus.yml
Normal file
@@ -0,0 +1,59 @@
|
||||
# my global config #
|
||||
global:
|
||||
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- 10.2.0.22:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
- "alert.node.yml"
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: "prometheus"
|
||||
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
- job_name: "node"
|
||||
static_configs:
|
||||
- targets:
|
||||
- "poblano.rre.nu:9100"
|
||||
- "salt.rre.nu:9100"
|
||||
- "pepper.rre.nu:9100"
|
||||
- "woody.rre.nu:9100"
|
||||
- "serrano.rre.nu:9100"
|
||||
- "coronado.rre.nu:9100"
|
||||
- job_name: "unpoller"
|
||||
static_configs:
|
||||
- targets:
|
||||
- "unpoller.rre.nu:9130"
|
||||
- job_name: "fail2ban"
|
||||
static_configs:
|
||||
- targets:
|
||||
- "poblano.rre.nu:9191"
|
||||
- "salt.rre.nu:9191"
|
||||
- "pepper.rre.nu:9191"
|
||||
- job_name: "nginx"
|
||||
static_configs:
|
||||
- targets:
|
||||
- "10.2.0.22:9193"
|
||||
|
||||
- job_name: "promtail"
|
||||
static_configs:
|
||||
- targets:
|
||||
- "serrano.rre.nu:9080"
|
||||
- "coronado.rre.nu:9080"
|
29
containers/files/promtail.conf.jinja
Normal file
29
containers/files/promtail.conf.jinja
Normal file
@@ -0,0 +1,29 @@
|
||||
server:
|
||||
http_listen_port: {{ http_listen_port }}
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: "{{ client_url }}"
|
||||
|
||||
scrape_configs:
|
||||
- job_name: journal
|
||||
journal:
|
||||
max_age: 12h
|
||||
path: /var/log/journal
|
||||
labels:
|
||||
job: systemd-journal
|
||||
relabel_configs:
|
||||
- source_labels: ['__journal__systemd_unit']
|
||||
target_label: 'unit'
|
||||
- source_labels: ["__journal__hostname"]
|
||||
target_label: host
|
||||
- source_labels: ["__journal_priority_keyword"]
|
||||
target_label: level
|
||||
- source_labels: ["__journal_syslog_identifier"]
|
||||
target_label: syslog_identifier
|
||||
- source_labels: ["__journal_container_name"]
|
||||
target_label: container_name
|
||||
|
234
containers/files/unpoller.conf
Normal file
234
containers/files/unpoller.conf
Normal file
@@ -0,0 +1,234 @@
|
||||
# Unpoller v2 primary configuration file. TOML FORMAT #
|
||||
###########################################################
|
||||
|
||||
[poller]
|
||||
# Turns on line numbers, microsecond logging, and a per-device log.
|
||||
# The default is false, but I personally leave this on at home (four devices).
|
||||
# This may be noisy if you have a lot of devices. It adds one line per device.
|
||||
debug = false
|
||||
|
||||
# Turns off per-interval logs. Only startup and error logs will be emitted.
|
||||
# Recommend enabling debug with this setting for better error logging.
|
||||
quiet = false
|
||||
|
||||
# Load dynamic plugins. Advanced use; only sample mysql plugin provided by default.
|
||||
plugins = []
|
||||
|
||||
#### OUTPUTS
|
||||
|
||||
# If you don't use an output, you can disable it.
|
||||
|
||||
[prometheus]
|
||||
disable = false
|
||||
# This controls on which ip and port /metrics is exported when mode is "prometheus".
|
||||
# This has no effect in other modes. Must contain a colon and port.
|
||||
http_listen = "0.0.0.0:9130"
|
||||
# Adding an SSL Cert and Cert Key will make Poller listen with SSL/https.
|
||||
ssl_cert_path = ""
|
||||
ssl_key_path = ""
|
||||
# Errors are rare. Setting this to true will report them to Prometheus.
|
||||
report_errors = false
|
||||
## Record data for disabled or down (unlinked) switch ports.
|
||||
dead_ports = false
|
||||
|
||||
[influxdb]
|
||||
disable = true
|
||||
# InfluxDB does not require auth by default, so the user/password are probably unimportant.
|
||||
url = "http://127.0.0.1:8086"
|
||||
user = "unifipoller"
|
||||
# Password for InfluxDB user (above).
|
||||
# If the password provided here begins with file:// then the password is read in from
|
||||
# the file path that follows the file:// prefix. ex: file:///etc/influxdb/passwd.file
|
||||
pass = "unifipoller"
|
||||
# Be sure to create this database. See the InfluxDB Wiki page for more info.
|
||||
db = "unifi"
|
||||
# If your InfluxDB uses a valid SSL cert, set this to true.
|
||||
verify_ssl = false
|
||||
# The UniFi Controller only updates traffic stats about every 30 seconds.
|
||||
# Setting this to something lower may lead to "zeros" in your data.
|
||||
# If you're getting zeros now, set this to "1m"
|
||||
interval = "30s"
|
||||
## Record data for disabled or down (unlinked) switch ports.
|
||||
dead_ports = false
|
||||
|
||||
# To enable output of UniFi Events to Loki, add a URL; it's disabled otherwise.
|
||||
# User, pass and tenant_id are optional and most folks wont set them.
|
||||
# Pick which logs you want per-controller in the [unifi.controller] section.
|
||||
# This is a new feature. Feedback welcome!
|
||||
[loki]
|
||||
disable = false
|
||||
url = "{{ pillar['containers']['unpoller']['loki_url'] }}"
|
||||
# The rest of this is advanced & optional. See wiki.
|
||||
user = ""
|
||||
pass = ""
|
||||
verify_ssl = false
|
||||
tenant_id = ""
|
||||
interval = "2m"
|
||||
timeout = "10s"
|
||||
|
||||
[datadog]
|
||||
# How often to poll UniFi and report to Datadog.
|
||||
interval = "2m"
|
||||
|
||||
# To enable this output plugin
|
||||
enable = false
|
||||
|
||||
# Datadog Custom Options
|
||||
|
||||
# address to talk to the datadog agent, by default this uses the local statsd UDP interface
|
||||
# address = "localhost:8125"
|
||||
|
||||
# namespace to prepend to all data, default is no additional prefix.
|
||||
# namespace = ""
|
||||
|
||||
# tags to append to all data
|
||||
# tags = [ "customer:abc_corp" ]
|
||||
|
||||
# For more advanced options for very large amount of data collected see the upstream
|
||||
# github.com/unpoller/unpoller/pkg/datadogunifi repository README.
|
||||
|
||||
|
||||
# Unpoller has an optional web server. To turn it on, set enable to true. If you
|
||||
# wish to use SSL, provide SSL cert and key paths. This interface is currently
|
||||
# read-only; it just displays information, like logs, devices and clients.
|
||||
# Notice: Enabling the web server with many sites will increase memory usage.
|
||||
# This is a new feature and lacks a UI, enabling only recommended for testing.
|
||||
[webserver]
|
||||
enable = false
|
||||
port = 37288
|
||||
# The HTML path is different on Windows and BSD/macOS.
|
||||
html_path = "/usr/lib/unifi-poller/web"
|
||||
ssl_cert_path = ""
|
||||
ssl_key_path = ""
|
||||
# How many events per event group to hold. 200-2000. Use fewer with many sites.
|
||||
# With 1 site, you'll have a max total of 9 event groups; 1 per plugin, 4 per site.
|
||||
# Each site adds 1 event group for each of these inputs that is enabled:
|
||||
# save_ids, save_events, save_anomalies, save_alarms.
|
||||
max_events = 200
|
||||
|
||||
# By default the web interface does not require authentication. You can change
|
||||
# that by adding a username and password hash (or multiple) below.
|
||||
# To create a hash, run unifi-poller with the -e CLI argument. See Wiki for more!
|
||||
[webserver.accounts]
|
||||
# username = "password-hash"
|
||||
# captain = "$2a$04$mxw6i0LKH6u46oaLK2cq5eCTAAFkfNiRpzNbz.EyvJZZWNa2FzIlS"
|
||||
|
||||
#### INPUTS
|
||||
|
||||
[unifi]
|
||||
# Setting this to true and providing default credentials allows you to skip
|
||||
# configuring controllers in this config file. Instead you configure them in
|
||||
# your prometheus.yml config. Prometheus then sends the controller URL to
|
||||
# Unpoller when it performs the scrape. This is useful if you have many,
|
||||
# or changing controllers. See wiki for more.
|
||||
dynamic = false
|
||||
|
||||
# The following section contains the default credentials/configuration for any
|
||||
# dynamic controller (see above section), or the primary controller if you do not
|
||||
# provide one and dynamic is disabled. In other words, you can just add your
|
||||
# controller here and delete the following section. The internal defaults are
|
||||
# shown below. Any missing values will assume these displayed defaults.
|
||||
|
||||
[unifi.defaults]
|
||||
# URL for the UniFi Controller. Do not add any paths after the host:port.
|
||||
# Do not use port 8443 if you have a UDM; just use "https://ip".
|
||||
url = "{{ pillar['containers']['unpoller']['unifi_url'] }}"
|
||||
|
||||
# Make a read-only user in the UniFi Admin Settings, allow it access to all sites.
|
||||
user = "{{ pillar['containers']['unpoller']['unifi_user'] }}"
|
||||
|
||||
# Password for UniFi controller user (above).
|
||||
# If the password provided here begins with file:// then the password is read in from
|
||||
# the file path that follows the file:// prefix. ex: file:///etc/unifi/password.file
|
||||
# ex: file:///etc/unifi/passwd.file, windows: file://C:\\UserData\\Unifi\\Passwd.txt
|
||||
pass = "{{ pillar['containers']['unpoller']['unifi_pass'] }}"
|
||||
|
||||
# If the controller has more than one site, specify which sites to poll here.
|
||||
# Set this to ["default"] to poll only the first site on the controller.
|
||||
# A setting of ["all"] will poll all sites; this works if you only have 1 site too.
|
||||
sites = ["all"]
|
||||
|
||||
# Specify a timeout, leave missing to declare infinite wait. This determines the maximum
|
||||
# time to wait for a response from the unifi controller on any API request.
|
||||
# timeout = 60s
|
||||
|
||||
# Enable collection of site data. This data powers the Network Sites dashboard.
|
||||
# It's not valuable to everyone and setting this to false will save resources.
|
||||
save_sites = true
|
||||
|
||||
# Hash, with md5, client names and MAC addresses. This attempts to protect
|
||||
# personally identifiable information. Most users won't want to enable this.
|
||||
hash_pii = false
|
||||
|
||||
# Enable collection of Intrusion Detection System Data (InfluxDB/Loki only).
|
||||
# Only useful if IDS or IPS are enabled on one of the sites. This may store
|
||||
# a lot of information. Only recommended for testing and debugging. There
|
||||
# may not be any dashboards to display this data. It can be used for annotations.
|
||||
# Enable this only if using InfluxDB or Loki. This will leak PII data!
|
||||
save_ids = false
|
||||
|
||||
# Enable collection of UniFi Events (InfluxDB/Loki only).
|
||||
# This may store a lot of information. Only recommended for testing and debugging.
|
||||
# There are no dashboards to display this data. It can be used for annotations.
|
||||
# This is a new (June, 2020) feature. Please provide feedback if you try it out!
|
||||
# Enable this only if using InfluxDB or Loki. This will leak PII data!
|
||||
save_events = true
|
||||
|
||||
# Enable collection of UniFi Alarms (InfluxDB/Loki only).
|
||||
# There are no dashboards to display this data. It can be used for annotations.
|
||||
# This is a new (June, 2020) feature. Please provide feedback if you try it out!
|
||||
# Enable this only if using InfluxDB or Loki. This will leak PII data!
|
||||
save_alarms = true
|
||||
|
||||
# Enable collection of UniFi Anomalies (InfluxDB/Loki only).
|
||||
# There are no dashboards to display this data. It can be used for annotations.
|
||||
# This is a new (June, 2020) feature. Please provide feedback if you try it out!
|
||||
# Enable this only if using InfluxDB or Loki.
|
||||
save_anomalies = true
|
||||
|
||||
# Enable collection of Deep Packet Inspection data. This data breaks down traffic
|
||||
# types for each client and site, it powers a dedicated DPI dashboard.
|
||||
# Enabling this adds roughly 150 data points per client. That's 6000 metrics for
|
||||
# 40 clients. This adds a little bit of poller run time per interval and causes
|
||||
# more API requests to your controller(s). Don't let these "cons" sway you:
|
||||
# it's cool data. Please provide feedback on your experience with this feature.
|
||||
save_dpi = false
|
||||
|
||||
## Enabling save_rogue stores even more data in your time series databases.
|
||||
## This saves neighboring access point metrics in a dedicated table or namespace.
|
||||
save_rogue = false
|
||||
|
||||
# If your UniFi controller has a valid SSL certificate (like lets encrypt),
|
||||
# you can enable this option to validate it. Otherwise, any SSL certificate is
|
||||
# valid. If you don't know if you have a valid SSL cert, then you don't have one.
|
||||
verify_ssl = false
|
||||
|
||||
## You may provide a list of SSL cert files (PEM format) that you expect your
|
||||
## controller to use. As long as one of the certs you provide here shows up in
|
||||
## the cert trust chain the controller presents it will be accepted and allowed.
|
||||
## These files may be re-read while poller is running.
|
||||
## Example: ssl_cert_paths = ["/path/to/cert.pem", "/another/cert.pem"]
|
||||
ssl_cert_paths = []
|
||||
|
||||
# The following is optional and used for configurations with multiple UniFi controllers.
|
||||
|
||||
# You may repeat the following [[unifi.controller]] section as many times as needed to
|
||||
# poll multiple controllers. Uncomment the entire section including [[unifi.controller]].
|
||||
# Omitted variables will have their values taken from the defaults, above.
|
||||
#
|
||||
#[[unifi.controller]]
|
||||
# url = "https://127.0.0.1:8443"
|
||||
# user = "unifipoller"
|
||||
# pass = "unifipoller"
|
||||
# sites = ["all"]
|
||||
# save_sites = true
|
||||
# hash_pii = false
|
||||
# save_ids = false
|
||||
# save_events = false
|
||||
# save_alarms = false
|
||||
# save_anomalies = false
|
||||
# save_dpi = false
|
||||
# save_rogue = false
|
||||
# verify_ssl = false
|
||||
# ssl_cert_paths = []
|
||||
|
17
containers/freeipa.sls
Normal file
17
containers/freeipa.sls
Normal file
@@ -0,0 +1,17 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create freeipa root directory:
|
||||
file.directory:
|
||||
- name: /srv/freeipa
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
Create freeipa config directory:
|
||||
file.directory:
|
||||
- name: /srv/freeipa/data
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
{{ container_deploy('freeipa') }}
|
10
containers/gitea.sls
Normal file
10
containers/gitea.sls
Normal file
@@ -0,0 +1,10 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create gitea data directory:
|
||||
file.directory:
|
||||
- name: /srv/gitea
|
||||
- user: 1000
|
||||
- group: 1000
|
||||
- mode: "0750"
|
||||
|
||||
{{ container_deploy('gitea') }}
|
10
containers/gotify.sls
Normal file
10
containers/gotify.sls
Normal file
@@ -0,0 +1,10 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create gotify data directory:
|
||||
file.directory:
|
||||
- name: /srv/gotify
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
{{ container_deploy('gotify') }}
|
10
containers/grafana.sls
Normal file
10
containers/grafana.sls
Normal file
@@ -0,0 +1,10 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create grafana data directory:
|
||||
file.directory:
|
||||
- name: /srv/grafana
|
||||
- user: 472
|
||||
- group: root
|
||||
- mode: "0750"
|
||||
|
||||
{{ container_deploy('grafana') }}
|
46
containers/init.sls
Normal file
46
containers/init.sls
Normal file
@@ -0,0 +1,46 @@
|
||||
Copy check image update script:
|
||||
file.managed:
|
||||
- name: /root/bin/check_image_updates.sh
|
||||
- source: salt://containers/files/check_image_updates.sh.jinja
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: 0700
|
||||
|
||||
Create check image update service:
|
||||
file.managed:
|
||||
- name: /etc/systemd/system/check_image_updates.service
|
||||
- source: salt://containers/files/check_image_updates.service.jinja
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0644"
|
||||
|
||||
Create check image update timer:
|
||||
file.managed:
|
||||
- name: /etc/systemd/system/check_image_updates.timer
|
||||
- source: salt://containers/files/check_image_updates.timer.jinja
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0644"
|
||||
|
||||
Systemd daemon reload for image update:
|
||||
cmd.run:
|
||||
- name: systemctl daemon-reload
|
||||
- watch:
|
||||
- file: Create check image update service
|
||||
- file: Create check image update timer
|
||||
|
||||
Start check image update timer:
|
||||
service.running:
|
||||
- name: check_image_updates.timer
|
||||
- enable: True
|
||||
|
||||
{% if pillar.containers is defined %}
|
||||
include:
|
||||
{%- for container, args in pillar.containers.items() %}
|
||||
- containers.{{ container }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
|
32
containers/loki.sls
Normal file
32
containers/loki.sls
Normal file
@@ -0,0 +1,32 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create loki data directory:
|
||||
file.directory:
|
||||
- name: /srv/loki
|
||||
- user: 10001
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
Create loki config directory:
|
||||
file.directory:
|
||||
- name: /etc/loki
|
||||
- user: 10001
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
Create loki configuration file:
|
||||
file.managed:
|
||||
- name: /etc/loki/config.yaml
|
||||
- source: salt://containers/files/loki-config.yaml
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0644"
|
||||
|
||||
{{ container_deploy('loki') }}
|
||||
|
||||
Start loki service:
|
||||
service.running:
|
||||
- name: loki
|
||||
- enable: True
|
||||
- onchanges:
|
||||
- file: Create loki configuration file
|
10
containers/mariadb.sls
Normal file
10
containers/mariadb.sls
Normal file
@@ -0,0 +1,10 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create data directory for mariadb:
|
||||
file.directory:
|
||||
- name: /srv/mariadb
|
||||
- user: 999
|
||||
- group: 999
|
||||
- mode: "0755"
|
||||
|
||||
{{ container_deploy('mariadb') }}
|
17
containers/mosquitto.sls
Normal file
17
containers/mosquitto.sls
Normal file
@@ -0,0 +1,17 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create mosquitto configuration directory:
|
||||
file.directory:
|
||||
- name: /etc/mosquitto
|
||||
- user: 1883
|
||||
- group: 1883
|
||||
- mode: "0750"
|
||||
|
||||
Create mosquitto data directory:
|
||||
file.directory:
|
||||
- name: /srv/mosquitto
|
||||
- user: 1883
|
||||
- group: 1883
|
||||
- mode: "0750"
|
||||
|
||||
{{ container_deploy('mosquitto') }}
|
11
containers/nextcloud.sls
Normal file
11
containers/nextcloud.sls
Normal file
@@ -0,0 +1,11 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create nextcloud data directory:
|
||||
file.directory:
|
||||
- name: /srv/nextcloud
|
||||
- user: 33
|
||||
- group: 33
|
||||
- mode: "0755"
|
||||
|
||||
{{ container_deploy('nextcloud') }}
|
||||
{{ container_deploy('nextcloud-cron') }}
|
3
containers/node_exporter.sls
Normal file
3
containers/node_exporter.sls
Normal file
@@ -0,0 +1,3 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
{{ container_deploy('node_exporter') }}
|
25
containers/nodered.sls
Normal file
25
containers/nodered.sls
Normal file
@@ -0,0 +1,25 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create udev-rule for rfxcom usb dongel:
|
||||
file.managed:
|
||||
- name: /etc/udev/rules.d/99-rfxcom-serial.rules
|
||||
- source: salt://containers/files/99-rfxcom-serial.rules
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0644"
|
||||
|
||||
Relead udev rules for rfxcom dongel:
|
||||
cmd.run:
|
||||
- name: udevadm control --reload-rules
|
||||
- onchanges:
|
||||
- file: Create udev-rule for rfxcom usb dongel
|
||||
|
||||
Create data folder for nodered:
|
||||
file.directory:
|
||||
- name: /srv/nodered
|
||||
- user: 1000
|
||||
- group: 1000
|
||||
- mode: "0750"
|
||||
|
||||
{{ container_deploy('nodered') }}
|
||||
|
24
containers/npm.sls
Normal file
24
containers/npm.sls
Normal file
@@ -0,0 +1,24 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create nextcloud root directory:
|
||||
file.directory:
|
||||
- name: /srv/npm
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
Create nextcloud data directory:
|
||||
file.directory:
|
||||
- name: /srv/npm/data
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0750"
|
||||
|
||||
Create nextcloud letsencrypt directory:
|
||||
file.directory:
|
||||
- name: /srv/npm/letsencrypt
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0750"
|
||||
|
||||
{{ container_deploy('npm') }}
|
24
containers/piwigo.sls
Normal file
24
containers/piwigo.sls
Normal file
@@ -0,0 +1,24 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create piwigo root directory:
|
||||
file.directory:
|
||||
- name: /srv/piwigo
|
||||
- user: {{ pillar.containers.piwigo.env.PUID }}
|
||||
- group: {{ pillar.containers.piwigo.env.GUID }}
|
||||
- mode: "0750"
|
||||
|
||||
Create piwigo config directory:
|
||||
file.directory:
|
||||
- name: /srv/piwigo/config
|
||||
- user: {{ pillar.containers.piwigo.env.PUID }}
|
||||
- group: {{ pillar.containers.piwigo.env.GUID }}
|
||||
- mode: "0750"
|
||||
|
||||
Create piwigo gallery directory:
|
||||
file.directory:
|
||||
- name: /srv/piwigo/gallery
|
||||
- user: {{ pillar.containers.piwigo.env.PUID }}
|
||||
- group: {{ pillar.containers.piwigo.env.GUID }}
|
||||
- mode: "0750"
|
||||
|
||||
{{ container_deploy('piwigo') }}
|
26
containers/prometheus.sls
Normal file
26
containers/prometheus.sls
Normal file
@@ -0,0 +1,26 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create prometheus config directory:
|
||||
file.recurse:
|
||||
- name: /etc/prometheus
|
||||
- source: salt://containers/files/prometheus
|
||||
- user: nobody
|
||||
- group: root
|
||||
- dir_mode: "0755"
|
||||
- file_mode: "0644"
|
||||
|
||||
Create prometheus data directory:
|
||||
file.directory:
|
||||
- name: /srv/prometheus
|
||||
- user: nobody
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
{{ container_deploy('prometheus') }}
|
||||
|
||||
Start prometheus service:
|
||||
service.running:
|
||||
- name: prometheus
|
||||
- enable: True
|
||||
- watch:
|
||||
- file: Create prometheus config directory
|
34
containers/promtail.sls
Normal file
34
containers/promtail.sls
Normal file
@@ -0,0 +1,34 @@
|
||||
{%- set client_url = "http://loki.rre.nu:3100/loki/api/v1/push" %}
|
||||
{%- set http_listen_port = 9080 %}
|
||||
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create promtail configuration folder:
|
||||
file.directory:
|
||||
- name: /etc/promtail
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
Create promtail configuration:
|
||||
file.managed:
|
||||
- name: /etc/promtail/promtail.conf
|
||||
- source: salt://containers/files/promtail.conf.jinja
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0644"
|
||||
- require:
|
||||
- file: Create promtail configuration folder
|
||||
- context:
|
||||
client_url: {{ client_url }}
|
||||
http_listen_port: {{ http_listen_port }}
|
||||
|
||||
{{ container_deploy('promtail') }}
|
||||
|
||||
Start promtail service:
|
||||
service.running:
|
||||
- name: promtail.service
|
||||
- enable: True
|
||||
- watch:
|
||||
- file: Create promtail configuration
|
3
containers/salt.sls
Normal file
3
containers/salt.sls
Normal file
@@ -0,0 +1,3 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
{{ container_deploy('salt') }}
|
10
containers/unifi.sls
Normal file
10
containers/unifi.sls
Normal file
@@ -0,0 +1,10 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create unifi data directory:
|
||||
file.directory:
|
||||
- name: /srv/unifi
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0750"
|
||||
|
||||
{{ container_deploy('unifi') }}
|
20
containers/unpoller.sls
Normal file
20
containers/unpoller.sls
Normal file
@@ -0,0 +1,20 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
create config directory for unpoller:
|
||||
file.directory:
|
||||
- name: /srv/unpoller
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0750"
|
||||
|
||||
create unpoller config:
|
||||
file.managed:
|
||||
- name: /srv/unpoller/up.conf
|
||||
- source: salt://containers/files/unpoller.conf
|
||||
- template: jinja
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0640"
|
||||
|
||||
|
||||
{{ container_deploy('unpoller') }}
|
25
containers/zigbee2mqtt.sls
Normal file
25
containers/zigbee2mqtt.sls
Normal file
@@ -0,0 +1,25 @@
|
||||
{% from 'lib.sls' import container_deploy with context %}
|
||||
|
||||
Create udev-rule for zigbee usb dongel:
|
||||
file.managed:
|
||||
- name: /etc/udev/rules.d/99-zigbee-serial.rules
|
||||
- source: salt://containers/files/99-zigbee-serial.rules
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0644"
|
||||
|
||||
Relead udev rules for zigbee dongel:
|
||||
cmd.run:
|
||||
- name: udevadm control --reload-rules
|
||||
- onchanges:
|
||||
- file: Create udev-rule for zigbee usb dongel
|
||||
|
||||
Create zigbee2mqtt data folder:
|
||||
file.directory:
|
||||
- name: /srv/zigbee2mqtt
|
||||
- user: root
|
||||
- group: root
|
||||
- mode: "0755"
|
||||
|
||||
{{ container_deploy('zigbee2mqtt') }}
|
||||
|
Reference in New Issue
Block a user