Skip to content
Snippets Groups Projects
Verified Commit 98a99f85 authored by Sheogorath's avatar Sheogorath :european_castle:
Browse files

feat(nut-exporter): Deploy inital version of nut-exporter

Currently the UPS of the cluster is not really monitored, therefore this
patch adds nut-exporter to integrate the UPS status in the regular
prometheus metrics. This should allow to create fancy dashboards and a
calculate the power usage of the infrastructure.
parent 98a4e5e1
No related branches found
No related tags found
No related merge requests found
Showing
with 1580 additions and 0 deletions
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: nut-exporter
resources:
- namespace.yaml
- release.yaml
- ../../../shared/networkpolicies/allow-from-monitoring.yaml
patchesStrategicMerge:
- networkpolicy.yaml
apiVersion: v1
kind: Namespace
metadata:
name: nut-exporter
labels:
pod-security.kubernetes.io/audit: restricted
pod-security.kubernetes.io/enforce: restricted
pod-security.kubernetes.io/warn: restricted
pod-security.kubernetes.io/audit-version: v1.27
pod-security.kubernetes.io/enforce-version: v1.27
pod-security.kubernetes.io/warn-version: v1.27
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: flux-reconciler
namespace: nut-exporter
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: flux-reconciler
namespace: nut-exporter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: admin
subjects:
- kind: ServiceAccount
name: flux-reconciler
namespace: nut-exporter
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-from-monitoring
spec:
podSelector:
matchLabels:
app.kubernetes.io/name: nut-exporter
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: nut-exporter
namespace: nut-exporter
spec:
serviceAccountName: flux-reconciler
timeout: 5m
releaseName: nut-exporter
chart:
spec:
chart: ./charts/nut-exporter
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
interval: 5m
valuesFrom:
- kind: ConfigMap
name: nut-exporter-base-values
valuesKey: values.yaml
- kind: Secret
name: nut-exporter-override-values
valuesKey: values-overrides.yaml
optional: true
install:
remediation:
retries: -1
upgrade:
remediation:
retries: -1
---
apiVersion: v1
kind: ConfigMap
metadata:
name: nut-exporter-base-values
namespace: nut-exporter
data:
values.yaml: |
env:
- name: NUT_EXPORTER_USERNAME
valueFrom:
secretKeyRef:
name: nut-credentials
key: username
- name: NUT_EXPORTER_PASSWORD
valueFrom:
secretKeyRef:
name: nut-credentials
key: password
- name: NUT_EXPORTER_SERVER
value: null
valueFrom:
secretKeyRef:
name: nut-credentials
key: server
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: nut-exporter
resources:
- ../../base/nut-exporter
- nut-exporter-values.yaml
- ../../../shared/resourcequotas/default.yaml
apiVersion: v1
kind: Secret
metadata:
name: nut-credentials
namespace: nut-exporter
type: Opaque
stringData:
username: ENC[AES256_GCM,data:J4LuTco4,iv:TRE7Og6jXBm0eX+Vu25ctcy0nYl4hpT1qE4UgH685Yg=,tag:ZXinZcEy+3I2sVV8emc+pQ==,type:str]
password: ENC[AES256_GCM,data:qk3pIxhcGTdWUiMmNj4dzRPaMHc/GsiVwCFhYrWNbxxLwwCARokwUQ==,iv:UBSKLK75ixaS1Vso6hehhT8KOzoyZSBs2fYiQOPn4Pg=,tag:UPy19Ce1/ytefletuTZGLA==,type:str]
server: ENC[AES256_GCM,data:IFR/piHcieARr+rtzYvaOggnJezN+/gP,iv:T7UWUIk0xCvWak47ZEXpc2VjQYg4psJezG1SODgvEck=,tag:detvjcMN3CMwW6zGnMO5QQ==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age: []
lastmodified: "2023-10-07T23:18:35Z"
mac: ENC[AES256_GCM,data:XsgCSpjxMkuxB4EsrV3XRpYkLAg0TKFMaqukg1Q1xwx22uiIRYz181e1XPGsk+ooLFggFrqST5LTU+tfv4fXO2IBhV8xAxG/S9bvHmFgup4XqAGopIh/9spVUyBoWsW769f/CGvR+IOFY+l+QzWoiVC62Dit1FOZetjQ9pPQkNw=,iv:urh6+qKhkE5hVTYEl3oVdaIK/1vQGqDeuR4k7Z1CFRU=,tag:uCHIR95VpyHJYMQGJB4hNg==,type:str]
pgp:
- created_at: "2022-03-22T22:26:35Z"
enc: |-
-----BEGIN PGP MESSAGE-----
wcFMA7kpg2bgzVHcAQ/+O79bVMXVw/wwdUnFvIEbvuhXm+SszL43WgzOley9ufSX
ItiDnlveogDn/vP5gTB0+kjVWwkZLE0gCQDiKX6hb+yyL9gZ1t/hARzd0K+q2Bau
UCJGrudH8SxfC4k4xPrvnH3QKXuencDXWwv+5k/JSXley90Fqf2lAbMy2fp+T8I7
P85LK2kqabyfWXAirIWwtEdoa4YmzhkD3vQQMv5g1FYuMAMnwVftOSUGqP0E9q/e
rSmS9j5tZ+u54uMwjj7FACCuq6tkn0vNynSfgXRPOXfY5e/unhJF5yCKJvvK5K4w
er1d74lb9U6Mq+27SPbMHswZ8oE/u4CXdP6CpzM4Xen5rLi2kK0IRDQNYEmfwdb/
DChJyklzVGJ2GU6dQl6YvEZ1Hqce4n2M2+WO14tH5xkU6hS7UT8TolOkon04e8V5
m2UAZKm52stE9/Rn5f3lRlu7dYFqlz740GwXF4OWLuqWEEEcxrN7Ii7u8uxe7Grl
CrZ3ukU0ZigrifnG6qs6QsBd1pupOgJ/qffYAm3VnJnrXN46b1OjQgtD17LapCag
jyrSus+pA1GBlYNDMjUNbg4i0ZpMmjzOL053BODt1nuepxnZH5C0YatEeyVwHEdV
lX76sadvQxk/Dxq+PvKcY95k2FXT1ddG9F2RbRsuXgXUlHfQYL9/nV39ll6ADGrS
UQE5FSLYK4f7q3aB/lqupOSGf4N+SISIHRUES45BYNs89iolFhHg6LpdcjYsYWPG
VqAYNeZuyPjAln1+UlcPrQ+rgq5xX5wz0G9M5w8h16waPQ==
=WHaH
-----END PGP MESSAGE-----
fp: 286791FB6648539775DB31B8FCB98C2A3EC6F601
- created_at: "2022-03-22T22:26:35Z"
enc: |
-----BEGIN PGP MESSAGE-----
hQIMA4oYbIHZIrAPAQ/+J8RluxthuZjThUOoiZRWXNcgPnYTLTGxODE7xkYob7Nn
rC2GrdU8C83F2mxiP5D2xdekxFmEh0w3GGskXu11ZwKbf3Olx4Y0zoIQee2gj1rL
qDViXSjcX03LAzg6cKjC9vgs1Gyb0zNOtVWolZWXLLn18HuR3anuxqP3YEBk0KaQ
Cd6++D7e30ykbfqnkyiq63x6syp7ynPYxKwZDb3pRDpvLwrYXavzpEZVL+ELndIs
atnOwnRyOMys0R/XyeGN8XBlPhgIQ5+cIeB7xgvZpaOBp3lJexSgKhkWCw3eNOAp
qeoAh4qr0bDVUkxqK4TyCCg+gu5kjMihVgEeCbaf6brJazfO8U+Su4BmteLPuHHJ
229UonVOsRBqUCZHYtIJhDpAsWkWPrgFayr/ysBiOL6ZwZ541PqafTkg/r7AytkU
WCEdv3VQm/j8S4blIP5ieOMx2vIt8DltXM7heInjPDFd3OazRIa2RA5z2/Tsjil1
3UJ3S9g/10O/wh1AK45WQJKfQNoz/KloQJNlbhBiib8T2Ks+W3NMdYbiN+4+OpBC
ZSc+1/S2BcRMRDq9Dfx954ZBmF4ez549E/RwG60DVulSOf/cgAApUN4o/krhR20s
l6HT7gSPA6yEHCcZG9YXUPssGrBySbAYtds9ckFIZAPKg1m7EfnUb1jBgycFQUXU
aAEJAhDyVhnQg409kDiBw79BXUBssQi8Udov0EgljRe2xWthHrrcIsewpTUCtaYg
6F6Cnc1aNoU99mSqCxkCeoqYtOhWtvkX7FpbI5wHJIe5H0SGg+2KVpJbzJb4/N4e
TT3xaw4nSwo/
=cOEc
-----END PGP MESSAGE-----
fp: B137EE1549DFAF960DD1E2B15147025FB9F09E07
encrypted_regex: ^(data|stringData|email|dnsZones?|dnsNames?|hosts?|tang|externalURL|.*-secret|.*-url|.*Secrets?|.*-domain|password|subjects|node|apiURL|.*(S|s)erverNames?|.*SecretKey)$
version: 3.7.3
apiVersion: v1
name: nut-exporter
description: Installs NUT exporter in Kubernetes
annotations:
artifacthub.io/category: monitoring-logging
keywords:
- Network UPS Tools
- prometheus-exporter
sources:
- https://git.shivering-isles.com/shivering-isles/infrastructure-gitops/-/tree/main/charts/nut-exporter
- https://github.com/DRuggeri/nut_exporter
- https://github.com/acolombier/nut_exporter/tree/feat/add-helm-chart
type: application
version: 0.1.0
appVersion: 3.0.0
# nut-exporter
![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.0.0](https://img.shields.io/badge/AppVersion-3.0.0-informational?style=flat-square)
Installs NUT exporter in Kubernetes
## Source Code
* <https://git.shivering-isles.com/shivering-isles/infrastructure-gitops/-/tree/main/charts/nut-exporter>
* <https://github.com/DRuggeri/nut_exporter>
* <https://github.com/acolombier/nut_exporter/tree/feat/add-helm-chart>
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| dashboard | object | `{"enabled":true,"labels":{"grafana_dashboard":"1"}}` | Deploys a Grafana dashboard as a configmap |
| env | list | `[{"name":"NUT_EXPORTER_SERVER","value":"192.0.2.1"}]` | environment variables for nut_exporter |
| extraArgs | list | `[]` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"ghcr.io/druggeri/nut_exporter"` | |
| image.tag | string | `""` | |
| nodeSelector | object | `{}` | |
| podMonitor | object | `{"enabled":true,"labels":{},"relabelings":[]}` | Enables podMonitor object for prometheus-operator based setups |
| podSecurityContext.runAsGroup | int | `3642` | |
| podSecurityContext.runAsNonRoot | bool | `true` | |
| podSecurityContext.runAsUser | int | `3642` | |
| podSecurityContext.seccompProfile.type | string | `"RuntimeDefault"` | |
| resources.limits.cpu | string | `"200m"` | |
| resources.limits.memory | string | `"128Mi"` | |
| resources.requests.cpu | string | `"50m"` | |
| resources.requests.memory | string | `"24Mi"` | |
| rules | object | `{"enabled":true,"labels":{},"rules":[{"alert":"UPSBatteryNeedsReplacement","annotations":{"message":"{{ $labels.ups }} is indicating a need for a battery replacement."},"expr":"network_ups_tools_ups_status{flag=\"RB\"} != 0","for":"60s","labels":{"severity":"high"}},{"alert":"UPSLowBattery","annotations":{"message":"{{ $labels.ups }} has low battery and is running on backup. Expect shutdown soon"},"expr":"network_ups_tools_ups_status{flag=\"LB\"} == 0 and network_ups_tools_ups_status{flag=\"OL\"} == 0","for":"60s","labels":{"severity":"critical"}},{"alert":"UPSRuntimeShort","annotations":{"message":"{{ $labels.ups }} has only {{ $value | humanizeDuration}} of battery autonomy"},"expr":"network_ups_tools_battery_runtime < 300","for":"30s","labels":{"severity":"high"}},{"alert":"UPSMainPowerOutage","annotations":{"message":"{{ $labels.ups }} has no main power and is running on backup."},"expr":"network_ups_tools_ups_status{flag=\"OL\"} == 0","for":"60s","labels":{"severity":"critical"}},{"alert":"UPSIndicatesWarningStatus","annotations":{"message":"{{ $labels.ups }} is indicating a need for a battery replacement."},"expr":"network_ups_tools_ups_status{flag=\"HB\"} != 0","for":"60s","labels":{"severity":"warning"}}]}` | Prometheus rules to trigger alerts from UPS |
| securityContext.allowPrivilegeEscalation | bool | `false` | |
| securityContext.capabilities.drop[0] | string | `"ALL"` | |
| securityContext.readOnlyRootFilesystem | bool | `true` | |
| tolerations | list | `[]` | |
----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
This diff is collapsed.
{{/*
Defining names
*/}}
{{- define "nutexporter.name" -}}
{{- .Release.Name }}-nut-exporter
{{- end }}
{{- define "nutexporter.fullName" -}}
{{- .Release.Namespace }}-{{ include "nutexporter.name" . }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "nutexporter.labels" -}}
{{ include "nutexporter.selectorLabels" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/part-of: nut-exporter
version: {{ .Chart.Version }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "nutexporter.selectorLabels" -}}
app.kubernetes.io/component: server
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/name: nut-exporter
{{- end }}
\ No newline at end of file
{{- if .Values.dashboard.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "nutexporter.name" . }}-dashboards
labels:
{{- include "nutexporter.labels" . | nindent 4 }}
{{- toYaml .Values.dashboard.labels | nindent 4 }}
data:
nutdashboard.json: |-
{{ $.Files.Get "dashboards/default.json" | nindent 4 }}
{{- end }}
\ No newline at end of file
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "nutexporter.name" . }}
labels:
{{- include "nutexporter.labels" . | nindent 4 }}
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
{{- include "nutexporter.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "nutexporter.selectorLabels" . | nindent 8 }}
spec:
containers:
- name: nut-exporter
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- with .Values.env }}
env:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.extraArgs }}
args:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
ports:
- containerPort: 9199
name: http
protocol: TCP
livenessProbe:
httpGet:
path: /ups_metrics
port: http
initialDelaySeconds: 10
failureThreshold: 5
timeoutSeconds: 2
readinessProbe:
httpGet:
path: /ups_metrics
port: http
initialDelaySeconds: 5
failureThreshold: 3
timeoutSeconds: 2
{{- with .Values.resources }}
resources:
{{- toYaml . | nindent 12}}
{{- end }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.podMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
labels:
{{- include "nutexporter.labels" . | nindent 4 }}
{{- with .Values.serviceMonitor.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
name: {{ include "nutexporter.name" . }}
spec:
podMetricsEndpoints:
- interval: 15s
{{- with $.Values.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ toYaml . | nindent 6}}
{{- end }}
{{- with $.Values.serviceMonitor.relabelings }}
relabelings:
{{ toYaml . | nindent 6}}
{{- end }}
path: /ups_metrics
port: http
scheme: http
jobLabel: nut-exporter
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
selector:
matchLabels:
{{- include "nutexporter.selectorLabels" . | nindent 6 }}
{{- end }}
{{- if .Values.rules.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ include "nutexporter.name" . }}-rules
labels:
{{- include "nutexporter.labels" . | nindent 4 }}
{{- with .Values.rules.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
groups:
- name: NutExporter
rules:
{{- toYaml .Values.rules.rules | nindent 6 }}
{{- end }}
image:
repository: ghcr.io/druggeri/nut_exporter
tag: ""
pullPolicy: IfNotPresent
# -- Deploys a Grafana dashboard as a configmap
dashboard:
enabled: true
labels:
grafana_dashboard: "1"
# -- Enables podMonitor object for prometheus-operator based setups
podMonitor:
enabled: true
labels: {}
# key: value
relabelings: []
# - replacement: "My UPS"
# targetLabel: ups
extraArgs: []
# - --log.level=debug
# -- environment variables for nut_exporter
env:
# - name: NUT_EXPORTER_USERNAME
# value: null
# - name: NUT_EXPORTER_USERNAME
# valueFrom:
# secretKeyRef:
# name: nut-credentials
# key: username
# - name: NUT_EXPORTER_PASSWORD
# value: null
# - name: NUT_EXPORTER_PASSWORD
# valueFrom:
# secretKeyRef:
# name: nut-credentials
# key: password
# - name: NUT_EXPORTER_VARIABLES
# value: "battery.charge,battery.runtime,battery.voltage,battery.voltage.nominal,input.voltage,input.voltage.nominal,ups.load,ups.status"
- name: NUT_EXPORTER_SERVER
value: "192.0.2.1"
# - name: NUT_EXPORTER_DISABLE_DEVICE_INFO
# value: "false"
# - name: NUT_EXPORTER_ON_REGEX
# value: "^(enable|enabled|on|true|active|activated)$"
# - name: NUT_EXPORTER_OFF_REGEX
# value: "^(disable|disabled|off|false|inactive|deactivated)$"
# - name: NUT_EXPORTER_STATUSES
# value: "OL,OB,LB,HB,RB,CHRG,DISCHRG,BYPASS,CAL,OFF,OVER,TRIM,BOOST,FSD,SD"
# - name: NUT_EXPORTER_METRICS_NAMESPACE
# value: "network_ups_tools"
# - name: NUT_EXPORTER_WEB_TELEMETRY_PATH
# value: "/ups_metrics"
# - name: NUT_EXPORTER_WEB_EXPORTER_TELEMETRY_PATH
# value: "/metrics"
# - name: NUT_EXPORTER_PRINT_METRICS
# value: "false"
securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
podSecurityContext:
runAsNonRoot: true
runAsUser: 3642
runAsGroup: 3642
seccompProfile:
type: "RuntimeDefault"
resources:
limits:
cpu: "200m"
memory: 128Mi
requests:
cpu: 50m
memory: 24Mi
nodeSelector: {}
# has-ups-server: yes
tolerations: []
# - key: node-role.kubernetes.io/master
# operator: "Exists"
# effect: NoSchedule
# -- Prometheus rules to trigger alerts from UPS
rules:
enabled: true
labels: {}
# key: value
rules:
- alert: UPSBatteryNeedsReplacement
annotations:
message: '{{ $labels.ups }} is indicating a need for a battery replacement.'
expr: network_ups_tools_ups_status{flag="RB"} != 0
for: 60s
labels:
severity: high
- alert: UPSLowBattery
annotations:
message: '{{ $labels.ups }} has low battery and is running on backup. Expect shutdown soon'
expr: network_ups_tools_ups_status{flag="LB"} == 0 and network_ups_tools_ups_status{flag="OL"} == 0
for: 60s
labels:
severity: critical
- alert: UPSRuntimeShort
annotations:
message: '{{ $labels.ups }} has only {{ $value | humanizeDuration}} of battery autonomy'
expr: network_ups_tools_battery_runtime < 300
for: 30s
labels:
severity: high
- alert: UPSMainPowerOutage
annotations:
message: '{{ $labels.ups }} has no main power and is running on backup.'
expr: network_ups_tools_ups_status{flag="OL"} == 0
for: 60s
labels:
severity: critical
- alert: UPSIndicatesWarningStatus
annotations:
message: '{{ $labels.ups }} is indicating a need for a battery replacement.'
expr: network_ups_tools_ups_status{flag="HB"} != 0
for: 60s
labels:
severity: warning
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment