From a089ac60fac690bdcda0cd932f0887ba1728a7bd Mon Sep 17 00:00:00 2001 From: Sheogorath <sheogorath@shivering-isles.com> Date: Sun, 10 Apr 2022 16:03:02 +0200 Subject: [PATCH] feat(cert-manager): Add dashboard and alerts This patch adds various cert-mananger alerts and dashboards these are inspired by uneeq-oss and monitoring-mixins on GitHub. References: https://github.com/monitoring-mixins/website --- infrastructure/cert-manager/alerts.yaml | 64 + .../cert-manager/dashboards/cert-manager.json | 1203 +++++++++++++++++ .../cert-manager/kustomization.yaml | 8 + 3 files changed, 1275 insertions(+) create mode 100644 infrastructure/cert-manager/alerts.yaml create mode 100644 infrastructure/cert-manager/dashboards/cert-manager.json diff --git a/infrastructure/cert-manager/alerts.yaml b/infrastructure/cert-manager/alerts.yaml new file mode 100644 index 000000000..e056dac6f --- /dev/null +++ b/infrastructure/cert-manager/alerts.yaml @@ -0,0 +1,64 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-cert-manager-rules + namespace: cert-manager +spec: + groups: + - name: cert-manager + rules: + - alert: CertManagerAbsent + annotations: + description: New certificates will not be able to be minted, and existing ones + can't be renewed until cert-manager is back. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent + summary: Cert Manager has dissapeared from Prometheus service discovery. + expr: absent(up{job="cert-manager"}) + for: 10m + labels: + severity: critical + - name: certificates + rules: + - alert: CertManagerCertExpirySoon + annotations: + description: The domain that this cert covers will be unavailable after {{ $value + | humanizeDuration }}. Clients using endpoints that this cert protects will + start to fail in {{ $value | humanizeDuration }}. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon + summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from + expiry, it should have renewed over a week ago. + expr: | + avg by (exported_namespace, namespace, name) ( + certmanager_certificate_expiration_timestamp_seconds - time() + ) < (21 * 24 * 3600) # 21 days in seconds + for: 1h + labels: + severity: warning + - alert: CertManagerCertNotReady + annotations: + description: This certificate has not been ready to serve traffic for at least + 10m. If the cert is being renewed or there is another valid cert, the ingress + controller _may_ be able to serve that instead. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready + summary: The cert `{{ $labels.name }}` is not ready to serve traffic. + expr: | + max by (name, exported_namespace, namespace, condition) ( + certmanager_certificate_ready_status{condition!="True"} == 1 + ) + for: 10m + labels: + severity: critical + - alert: CertManagerHittingRateLimits + annotations: + description: Depending on the rate limit, cert-manager may be unable to generate + certificates for up to a week. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits + summary: Cert manager hitting LetsEncrypt rate limits. + expr: | + sum by (host) ( + rate(certmanager_http_acme_client_request_count{status="429"}[5m]) + ) > 0 + for: 5m + labels: + severity: critical diff --git a/infrastructure/cert-manager/dashboards/cert-manager.json b/infrastructure/cert-manager/dashboards/cert-manager.json new file mode 100644 index 000000000..2d6608b3f --- /dev/null +++ b/infrastructure/cert-manager/dashboards/cert-manager.json @@ -0,0 +1,1203 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": 59, + "iteration": 1616445892702, + "links": [ ], + "panels": [ + { + "datasource": "$datasource", + "description": "The number of certificates in the ready state.", + "fieldConfig": { + "defaults": { + "custom": { }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "True" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { }, + "textMode": "auto" + }, + "pluginVersion": "7.4.5", + "targets": [ + { + "expr": "sum by (condition) (certmanager_certificate_ready_status)", + "interval": "", + "legendFormat": "{{condition}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Certificates Ready", + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { }, + "decimals": 1, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "#EAB839", + "value": 604800 + }, + { + "color": "green", + "value": 1209600 + } + ] + }, + "unit": "dtdurations" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { }, + "textMode": "auto" + }, + "pluginVersion": "7.4.5", + "targets": [ + { + "expr": "min(certmanager_certificate_expiration_timestamp_seconds > 0) - time()", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "vector(1250000)", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Soonest Cert Expiry", + "type": "stat" + }, + { + "datasource": "$datasource", + "description": "Status of the certificates. Values are True, False or Unknown.", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "filterable": false + }, + "mappings": [ + { + "from": "", + "id": 0, + "operator": "", + "text": "Yes", + "to": "", + "type": 1, + "value": "" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Ready Status" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Valid Until" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Valid Until" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 9, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Valid Until" + } + ] + }, + "pluginVersion": "7.4.5", + "targets": [ + { + "expr": "label_join(avg by (name, namespace, condition, exported_namespace) (certmanager_certificate_ready_status == 1), \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "label_join(avg by (name, namespace, exported_namespace) (certmanager_certificate_expiration_timestamp_seconds) * 1000, \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Certificates", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "namespaced_name" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Value #A": true, + "exported_namespace": false, + "exported_namespace 1": false, + "exported_namespace 2": true, + "name 1": true, + "namespace 2": true, + "namespaced_name": true + }, + "indexByName": { + "Time 1": 8, + "Time 2": 10, + "Value #A": 6, + "Value #B": 5, + "condition": 4, + "exported_namespace 1": 1, + "exported_namespace 2": 11, + "name 1": 9, + "name 2": 3, + "namespace": 0, + "namespace 1": 2, + "namespaced_name": 7 + }, + "renameByName": { + "Time 1": "", + "Value #B": "Valid Until", + "condition": "Ready Status", + "exported_namespace": "Certificate Namespace", + "exported_namespace 1": "Certificate Namespace", + "exported_namespace 2": "", + "name": "Certificate", + "name 2": "Certificate", + "namespace": "Namespace", + "namespace 1": "Namespace", + "namespaced_name": "" + } + } + } + ], + "type": "table" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "The rate of controller sync requests.", + "fieldConfig": { + "defaults": { + "custom": { }, + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 7, + "interval": "20s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (controller) (\n rate(certmanager_controller_sync_call_count[$__rate_interval])\n)", + "interval": "", + "legendFormat": "{{controller}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Controller Sync Requests/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Rate of requests to ACME provider.", + "fieldConfig": { + "defaults": { + "custom": { }, + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 6, + "interval": "20s", + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (method, path, status) (\n rate(certmanager_http_acme_client_request_count[$__rate_interval])\n)", + "interval": "", + "legendFormat": "{{method}} {{path}} {{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ACME HTTP Requests/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average duration of requests to ACME provider. ", + "fieldConfig": { + "defaults": { + "custom": { }, + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 10, + "interval": "30s", + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_sum[$__rate_interval]))\n/\nsum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_count[$__rate_interval]))", + "interval": "", + "legendFormat": "{{method}} {{path}} {{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ACME HTTP Request avg duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "CPU Usage and limits, as percent of a vCPU core. ", + "fieldConfig": { + "defaults": { + "custom": { }, + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 12, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "CPU", + "fill": 1, + "fillGradient": 5 + }, + { + "alias": "/Request.*/", + "color": "#FF9830", + "dashes": true + }, + { + "alias": "/Limit.*/", + "color": "#F2495C", + "dashes": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (pod) (rate(container_cpu_usage_seconds_total{container=\"cert-manager\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{pod}}", + "refId": "A" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_limits_cpu_cores{container=\"cert-manager\"})", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit {{pod}}", + "refId": "B" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_requests_cpu_cores{container=\"cert-manager\"})", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Request {{pod}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Percent of the time that the CPU is being throttled. Higher is badderer. ", + "fieldConfig": { + "defaults": { + "custom": { }, + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 24 + }, + "hiddenSeries": false, + "id": 14, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "maxDataPoints": 250, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/external-dns.*/", + "fill": 1, + "fillGradient": 5 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (pod) (\n rate(container_cpu_cfs_throttled_periods_total{container=\"cert-manager\"}[$__rate_interval])\n /\n rate(container_cpu_cfs_periods_total{container=\"cert-manager\"}[$__rate_interval])\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "CPU Throttling", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Memory utilisation and limits.", + "fieldConfig": { + "defaults": { + "custom": { }, + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 24 + }, + "hiddenSeries": false, + "id": 16, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Memory", + "fill": 1, + "fillGradient": 5 + }, + { + "alias": "Request", + "color": "#FF9830", + "dashes": true + }, + { + "alias": "Limit", + "color": "#F2495C", + "dashes": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (pod) (container_memory_usage_bytes{container=\"cert-manager\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Memory {{pod}}", + "refId": "A" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_limits_memory_bytes{container=\"cert-manager\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit {{pod}}", + "refId": "B" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_requests_memory_bytes{container=\"cert-manager\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Request {{pod}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network ingress/egress.", + "fieldConfig": { + "defaults": { + "custom": { }, + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 24 + }, + "hiddenSeries": false, + "id": 18, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmit", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(\n sum without (interface) (\n rate(container_network_receive_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "receive", + "refId": "A" + }, + { + "expr": "avg(\n sum without (interface) (\n rate(container_network_transmit_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "transmit", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Network", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "cert-manager", + "infra" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "prometheus", + "value": "prometheus" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Cert Manager", + "uid": "TvuRo2iMk", + "version": 1 +} diff --git a/infrastructure/cert-manager/kustomization.yaml b/infrastructure/cert-manager/kustomization.yaml index fd51ad3b1..dd8fcaea5 100644 --- a/infrastructure/cert-manager/kustomization.yaml +++ b/infrastructure/cert-manager/kustomization.yaml @@ -6,8 +6,16 @@ resources: - repository.yaml - release.yaml - networkpolicy.yaml + - alerts.yaml - ../../shared/networkpolicies/allow-from-monitoring.yaml - ../../shared/networkpolicies/allow-from-same-namespace.yaml - ../../shared/networkpolicies/allow-from-all-namespaces.yaml patchesStrategicMerge: - networkpolicy-patch.yaml +configMapGenerator: + - name: cert-manager-grafana-dashboards + files: + - ./dashboards/cert-manager.json + options: + labels: + grafana_dashboard: cert-manager -- GitLab