Skip to content
Snippets Groups Projects
Unverified Commit 12cd0fd3 authored by Frederic Branczyk's avatar Frederic Branczyk
Browse files

add ConfigMap generate script

This also contains the generated ConfigMaps for the grafana dashboards
as well as for alerts. These will be updated in subsequent changes as
sets of alerts and dashboards are added/updated in the respective
subdirectories in `/assets`.
parent 74a80cd1
No related branches found
No related tags found
No related merge requests found
# general cluster availability
# alert if another failed peer will result in an unavailable cluster
ALERT InsufficientPeers
IF count(up{job="etcd"} == 0) > (count(up{job="etcd"}) / 2 - 1)
FOR 3m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "Etcd cluster small",
description = "If one more etcd peer goes down the cluster will be unavailable",
}
# etcd leader alerts
# ==================
# alert if any etcd instance has no leader
ALERT EtcdNoLeader
IF etcd_server_has_leader{job="etcd"} == 0
FOR 1m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "etcd node has no leader",
description = "etcd node {{ $labels.instance }} has no leader",
}
# alert if there are lots of leader changes
ALERT HighNumberOfLeaderChanges
IF increase(etcd_server_leader_changes_seen_total{job="etcd"}[1h]) > 3
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of leader changes within the etcd cluster are happening",
description = "etcd instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour",
}
# gRPC request alerts
# ===================
# alert if more than 1% of gRPC method calls have failed within the last 5 minutes
ALERT HighNumberOfFailedGRPCRequests
IF sum by(grpc_method) (rate(etcd_grpc_requests_failed_total{job="etcd"}[5m]))
/ sum by(grpc_method) (rate(etcd_grpc_total{job="etcd"}[5m])) > 0.01
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of gRPC requests are failing",
description = "{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if more than 5% of gRPC method calls have failed within the last 5 minutes
ALERT HighNumberOfFailedGRPCRequests
IF sum by(grpc_method) (rate(etcd_grpc_requests_failed_total{job="etcd"}[5m]))
/ sum by(grpc_method) (rate(etcd_grpc_total{job="etcd"}[5m])) > 0.05
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "a high number of gRPC requests are failing",
description = "{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if the 99th percentile of gRPC method calls take more than 150ms
ALERT GRPCRequestsSlow
IF histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m])) > 0.15
FOR 10m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "slow gRPC requests",
description = "on ectd instance {{ $labels.instance }} gRPC requests to {{ $label.grpc_method }} are slow",
}
# HTTP requests alerts
# ====================
# alert if more than 1% of requests to an HTTP endpoint have failed within the last 5 minutes
ALERT HighNumberOfFailedHTTPRequests
IF sum by(method) (rate(etcd_http_failed_total{job="etcd"}[5m]))
/ sum by(method) (rate(etcd_http_received_total{job="etcd"}[5m])) > 0.01
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of HTTP requests are failing",
description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if more than 5% of requests to an HTTP endpoint have failed within the last 5 minutes
ALERT HighNumberOfFailedHTTPRequests
IF sum by(method) (rate(etcd_http_failed_total{job="etcd"}[5m]))
/ sum by(method) (rate(etcd_http_received_total{job="etcd"}[5m])) > 0.05
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "a high number of HTTP requests are failing",
description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if the 99th percentile of HTTP requests take more than 150ms
ALERT HTTPRequestsSlow
IF histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) > 0.15
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "slow HTTP requests",
description = "on ectd instance {{ $labels.instance }} HTTP requests to {{ $label.method }} are slow",
}
# file descriptor alerts
# ======================
instance:fd_utilization = process_open_fds / process_max_fds
# alert if file descriptors are likely to exhaust within the next 4 hours
ALERT FdExhaustionClose
IF predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
description = "{{ $labels.job }} instance {{ $labels.instance }} will exhaust in file descriptors soon",
}
# alert if file descriptors are likely to exhaust within the next hour
ALERT FdExhaustionClose
IF predict_linear(instance:fd_utilization[10m], 3600) > 1
FOR 10m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
description = "{{ $labels.job }} instance {{ $labels.instance }} will exhaust in file descriptors soon",
}
# etcd peer communication alerts
# ==============================
# alert if 99th percentile of round trips take 150ms
ALERT EtcdPeerCommunicationSlow
IF histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) > 0.15
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "etcd peer communication is slow",
description = "ectd instance {{ $labels.instance }} peer communication with {{ $label.To }} is slow",
}
# etcd proposal alerts
# ====================
# alert if there are several failed proposals within an hour
ALERT HighNumberOfFailedProposals
IF increase(etcd_server_proposals_failed_total{job="etcd"}[1h]) > 5
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of failed proposals within the etcd cluster are happening",
description = "etcd instance {{ $labels.instance }} has seen {{ $value }} proposal failures within the last hour",
}
# etcd disk io latency alerts
# ===========================
# alert if 99th percentile of fsync durations is higher than 500ms
ALERT HighFsyncDurations
IF histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) > 0.5
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "high fsync durations",
description = "ectd instance {{ $labels.instance }} fync durations are high",
}
# alert if 99th percentile of commit durations is higher than 250ms
ALERT HighCommitDurations
IF histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) > 0.25
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "high commit durations",
description = "ectd instance {{ $labels.instance }} commit durations are high",
}
\ No newline at end of file
{
"dashboard": {
"__inputs": [
{
"description": "",
"label": "prometheus",
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"pluginName": "Prometheus",
"type": "datasource"
}
],
"__requires": [
{
"id": "singlestat",
"name": "Singlestat",
"type": "panel",
"version": ""
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": ""
},
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "3.1.1"
},
{
"id": "prometheus",
"name": "Prometheus",
"type": "datasource",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"description": "",
"editable": true,
"gnetId": 162,
"hideControls": true,
"id": null,
"links": [
{
"icon": "external link",
"includeVars": true,
"tags": [],
"targetBlank": false,
"type": "dashboards"
}
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 6,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",instance=~\"$node\"}[1m] ) )) / count(node_cpu{mode=\"system\",instance=~\"$node:9100\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A",
"step": 10
}
],
"thresholds": "65, 90",
"title": "Cluster CPU Usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 3,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 3,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": false,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 9,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",instance=~\"$node\"}[1m])))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "CPU Usage",
"metric": "container_cpu_usage_seconds_total",
"refId": "A",
"step": 10
},
{
"expr": "count(node_cpu{mode=\"system\",instance=~\"$node:9100\"})",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "CPU Total",
"metric": "node_cpu",
"refId": "B",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Cluster CPU Usage",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 4,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "(sum(node_memory_MemTotal{instance=~\"$node:9100\"}) - sum(node_memory_MemFree{instance=~\"$node:9100\"}+node_memory_Buffers{instance=~\"$node:9100\"}+node_memory_Cached{instance=~\"$node:9100\"}) ) / sum(node_memory_MemTotal{instance=~\"$node:9100\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A",
"step": 10
}
],
"thresholds": "65, 90",
"title": "Cluster memory usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 2,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": false,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 9,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "(sum(node_memory_MemTotal{instance=~\"$node:9100\"}) - sum(node_memory_MemFree{instance=~\"$node:9100\"}+node_memory_Buffers{instance=~\"$node:9100\"}+node_memory_Cached{instance=~\"$node:9100\"}))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Cluster Memory Usage",
"metric": "node_memory_MemAvailable",
"refId": "A",
"step": 10
},
{
"expr": "sum(node_memory_MemTotal{instance=~\"$node:9100\"})",
"interval": "10s",
"intervalFactor": 2,
"legendFormat": "Cluster Memory Total",
"metric": "node_memory_MemTotal",
"refId": "B",
"step": 20
}
],
"timeFrom": null,
"timeShift": null,
"title": "Cluster Memory Usage",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 9,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": true,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum (rate (node_network_receive_bytes{instance=~\"$node:9100\"}[1m]))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Received Bytes",
"metric": "network",
"refId": "A",
"step": 10
},
{
"expr": "sum (rate (node_network_transmit_bytes{instance=~\"$node:9100\"}[1m]))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Transmitted Bytes",
"metric": "network",
"refId": "B",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Cluster Network I/O",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"schemaVersion": 12,
"sharedCrosshair": true,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": true,
"label": "Node",
"multi": false,
"name": "node",
"options": [],
"query": "label_values(kube_node_info, node)",
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Kubernetes Cluster Monitoring",
"version": 0
},
"inputs": [
{
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"type": "datasource",
"value": "prometheus"
}
],
"overwrite": true
}
{
"dashboard": {
"__inputs": [
{
"description": "",
"label": "prometheus",
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"pluginName": "Prometheus",
"type": "datasource"
}
],
"__requires": [
{
"id": "singlestat",
"name": "Singlestat",
"type": "panel",
"version": ""
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": ""
},
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "3.1.1"
},
{
"id": "prometheus",
"name": "Prometheus",
"type": "datasource",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"hideControls": true,
"id": null,
"links": [],
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 2,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 600
}
],
"thresholds": "",
"title": "Metadata Generation",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 3,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 600
}
],
"thresholds": "",
"title": "Observed Generation",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 4,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "kube_deployment_spec_paused{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 600
}
],
"thresholds": "",
"title": "Paused",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 1,
"isNew": true,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"refId": "A",
"step": 30
},
{
"expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 30
},
{
"expr": "kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 30
},
{
"expr": "kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 30
},
{
"expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"refId": "E",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Deployment Replicas",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"schemaVersion": 12,
"sharedCrosshair": true,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": false,
"label": "Namespace",
"multi": false,
"name": "deployment_namespace",
"options": [],
"query": "label_values(kube_deployment_metadata_generation, namespace)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": null,
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": false,
"label": "Deployment",
"multi": false,
"name": "deployment_name",
"options": [],
"query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "deployment",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Deployment",
"version": 3
},
"inputs": [
{
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"type": "datasource",
"value": "prometheus"
}
],
"overwrite": true
}
{
"dashboard": {
"__inputs": [
{
"description": "",
"label": "prometheus",
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"pluginName": "Prometheus",
"type": "datasource"
}
],
"__requires": [
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": ""
},
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "3.1.1"
},
{
"id": "prometheus",
"name": "Prometheus",
"type": "datasource",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"hideControls": false,
"id": null,
"links": [],
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 1,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Current: {{ container_name }}",
"metric": "container_memory_usage_bytes",
"refId": "A",
"step": 10
},
{
"expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}",
"interval": "10s",
"intervalFactor": 2,
"legendFormat": "Requested: {{ container }}",
"metric": "kube_pod_container_requested_memory_bytes",
"refId": "B",
"step": 20
}
],
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 2,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )",
"intervalFactor": 2,
"legendFormat": "{{ container_name }}",
"refId": "A",
"step": 30
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 3,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))",
"intervalFactor": 2,
"legendFormat": "{{ pod_name }}",
"refId": "A",
"step": 30
}
],
"timeFrom": null,
"timeShift": null,
"title": "Network I/O",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"schemaVersion": 12,
"sharedCrosshair": true,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": true,
"label": "Namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": "label_values(kube_pod_info, namespace)",
"refresh": 1,
"regex": "",
"type": "query"
},
{
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": false,
"label": "Pod",
"multi": false,
"name": "pod",
"options": [],
"query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)",
"refresh": 1,
"regex": "",
"type": "query"
},
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": true,
"label": "Container",
"multi": false,
"name": "container",
"options": [],
"query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)",
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Pods",
"version": 26
},
"inputs": [
{
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"type": "datasource",
"value": "prometheus"
}
],
"overwrite": true
}
{
"access": "proxy",
"basicAuth": false,
"name": "prometheus",
"type": "prometheus",
"url": "http://prometheus-k8s.monitoring.svc:9090"
}
#!/bin/bash
# Generate Alert Rules ConfigMap
kubectl create configmap --dry-run=true prometheus-k8s-alerts --from-file=assets/alerts/ -oyaml > manifests/prometheus/prometheus-k8s-alerts.yaml
# Generate Dashboard ConfigMap
kubectl create configmap --dry-run=true grafana-dashboards --from-file=assets/grafana/ -oyaml > manifests/grafana/grafana-cm.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboards
data:
grafana-kubernetes-cluster-dashboard.json: |
{
"dashboard": {
"__inputs": [
{
"description": "",
"label": "prometheus",
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"pluginName": "Prometheus",
"type": "datasource"
}
],
"__requires": [
{
"id": "singlestat",
"name": "Singlestat",
"type": "panel",
"version": ""
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": ""
},
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "3.1.1"
},
{
"id": "prometheus",
"name": "Prometheus",
"type": "datasource",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"description": "",
"editable": true,
"gnetId": 162,
"hideControls": true,
"id": null,
"links": [
{
"icon": "external link",
"includeVars": true,
"tags": [],
"targetBlank": false,
"type": "dashboards"
}
],
"refresh": "10s",
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 6,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",instance=~\"$node\"}[1m] ) )) / count(node_cpu{mode=\"system\",instance=~\"$node:9100\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A",
"step": 10
}
],
"thresholds": "65, 90",
"title": "Cluster CPU Usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 3,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 3,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": false,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 9,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",instance=~\"$node\"}[1m])))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "CPU Usage",
"metric": "container_cpu_usage_seconds_total",
"refId": "A",
"step": 10
},
{
"expr": "count(node_cpu{mode=\"system\",instance=~\"$node:9100\"})",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "CPU Total",
"metric": "node_cpu",
"refId": "B",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Cluster CPU Usage",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 4,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "(sum(node_memory_MemTotal{instance=~\"$node:9100\"}) - sum(node_memory_MemFree{instance=~\"$node:9100\"}+node_memory_Buffers{instance=~\"$node:9100\"}+node_memory_Cached{instance=~\"$node:9100\"}) ) / sum(node_memory_MemTotal{instance=~\"$node:9100\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A",
"step": 10
}
],
"thresholds": "65, 90",
"title": "Cluster memory usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 2,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": false,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 9,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "(sum(node_memory_MemTotal{instance=~\"$node:9100\"}) - sum(node_memory_MemFree{instance=~\"$node:9100\"}+node_memory_Buffers{instance=~\"$node:9100\"}+node_memory_Cached{instance=~\"$node:9100\"}))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Cluster Memory Usage",
"metric": "node_memory_MemAvailable",
"refId": "A",
"step": 10
},
{
"expr": "sum(node_memory_MemTotal{instance=~\"$node:9100\"})",
"interval": "10s",
"intervalFactor": 2,
"legendFormat": "Cluster Memory Total",
"metric": "node_memory_MemTotal",
"refId": "B",
"step": 20
}
],
"timeFrom": null,
"timeShift": null,
"title": "Cluster Memory Usage",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 9,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": true,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum (rate (node_network_receive_bytes{instance=~\"$node:9100\"}[1m]))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Received Bytes",
"metric": "network",
"refId": "A",
"step": 10
},
{
"expr": "sum (rate (node_network_transmit_bytes{instance=~\"$node:9100\"}[1m]))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Transmitted Bytes",
"metric": "network",
"refId": "B",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Cluster Network I/O",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"schemaVersion": 12,
"sharedCrosshair": true,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": true,
"label": "Node",
"multi": false,
"name": "node",
"options": [],
"query": "label_values(kube_node_info, node)",
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Kubernetes Cluster Monitoring",
"version": 0
},
"inputs": [
{
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"type": "datasource",
"value": "prometheus"
}
],
"overwrite": true
}
grafana-kubernetes-deployments-dashboard.json: |
{"dashboard":{ "__inputs": [ { "name": "DS_PROMETHEUS", "label": "prometheus", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__requires": [ { "type": "panel", "id": "singlestat", "name": "Singlestat", "version": "" }, { "type": "panel", "id": "graph", "name": "Graph", "version": "" }, { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "3.1.1" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" } ], "id": null, "title": "Deployment", "tags": [], "style": "dark", "timezone": "browser", "editable": true, "hideControls": true, "sharedCrosshair": true, "rows": [ { "collapse": false, "editable": true, "height": "250px", "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "id": 2, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 4, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "targets": [ { "expr": "kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "", "title": "Metadata Generation", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "avg" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "id": 3, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 4, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "targets": [ { "expr": "kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "", "title": "Observed Generation", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "avg" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "id": 4, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 4, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "targets": [ { "expr": "kube_deployment_spec_paused{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "", "title": "Paused", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "avg" } ], "title": "Row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "fill": 1, "grid": { "threshold1": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", "threshold2": null, "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "id": 1, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "refId": "A", "step": 30 }, { "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 30 }, { "expr": "kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 30 }, { "expr": "kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 30 }, { "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "refId": "E", "step": 30 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Deployment Replicas", "tooltip": { "msResolution": true, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "New row" } ], "time": { "from": "now-6h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "templating": { "list": [ { "allValue": ".*", "current": {}, "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Namespace", "multi": false, "name": "deployment_namespace", "options": [], "query": "label_values(kube_deployment_metadata_generation, namespace)", "refresh": 1, "regex": "", "sort": 0, "tagValuesQuery": null, "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Deployment", "multi": false, "name": "deployment_name", "options": [], "query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)", "refresh": 1, "regex": "", "sort": 0, "tagValuesQuery": "", "tagsQuery": "deployment", "type": "query", "useTags": false } ] }, "annotations": { "list": [] }, "schemaVersion": 12, "version": 3, "links": [], "gnetId": null },"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}
{
"dashboard": {
"__inputs": [
{
"description": "",
"label": "prometheus",
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"pluginName": "Prometheus",
"type": "datasource"
}
],
"__requires": [
{
"id": "singlestat",
"name": "Singlestat",
"type": "panel",
"version": ""
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": ""
},
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "3.1.1"
},
{
"id": "prometheus",
"name": "Prometheus",
"type": "datasource",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"hideControls": true,
"id": null,
"links": [],
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 2,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 600
}
],
"thresholds": "",
"title": "Metadata Generation",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 3,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 600
}
],
"thresholds": "",
"title": "Observed Generation",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 4,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "kube_deployment_spec_paused{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 600
}
],
"thresholds": "",
"title": "Paused",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 1,
"isNew": true,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"refId": "A",
"step": 30
},
{
"expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 30
},
{
"expr": "kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 30
},
{
"expr": "kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 30
},
{
"expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}",
"intervalFactor": 2,
"refId": "E",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Deployment Replicas",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"schemaVersion": 12,
"sharedCrosshair": true,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": false,
"label": "Namespace",
"multi": false,
"name": "deployment_namespace",
"options": [],
"query": "label_values(kube_deployment_metadata_generation, namespace)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": null,
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": false,
"label": "Deployment",
"multi": false,
"name": "deployment_name",
"options": [],
"query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "deployment",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Deployment",
"version": 3
},
"inputs": [
{
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"type": "datasource",
"value": "prometheus"
}
],
"overwrite": true
}
grafana-kubernetes-pods-dashboard.json: |
{"dashboard":{ "__inputs": [ { "name": "DS_PROMETHEUS", "label": "prometheus", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__requires": [ { "type": "panel", "id": "graph", "name": "Graph", "version": "" }, { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "3.1.1" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" } ], "id": null, "title": "Pods", "tags": [], "style": "dark", "timezone": "browser", "editable": true, "hideControls": false, "sharedCrosshair": true, "rows": [ { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "fill": 1, "grid": { "threshold1": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", "threshold2": null, "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "id": 1, "isNew": true, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", "interval": "10s", "intervalFactor": 1, "legendFormat": "Current: {{ container_name }}", "metric": "container_memory_usage_bytes", "refId": "A", "step": 10 }, { "expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}", "interval": "10s", "intervalFactor": 2, "legendFormat": "Requested: {{ container }}", "metric": "kube_pod_container_requested_memory_bytes", "refId": "B", "step": 20 } ], "timeFrom": null, "timeShift": null, "title": "Memory Usage", "tooltip": { "msResolution": true, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "show": true }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "Row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "fill": 1, "grid": { "threshold1": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", "threshold2": null, "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "id": 2, "isNew": true, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )", "intervalFactor": 2, "legendFormat": "{{ container_name }}", "refId": "A", "step": 30 } ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "msResolution": true, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "show": true }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "New row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "fill": 1, "grid": { "threshold1": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", "threshold2": null, "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "id": 3, "isNew": true, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))", "intervalFactor": 2, "legendFormat": "{{ pod_name }}", "refId": "A", "step": 30 } ], "timeFrom": null, "timeShift": null, "title": "Network I/O", "tooltip": { "msResolution": true, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "show": true }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "New row" } ], "time": { "from": "now-6h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "templating": { "list": [ { "allValue": ".*", "current": {}, "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Namespace", "multi": false, "name": "namespace", "options": [], "query": "label_values(kube_pod_info, namespace)", "refresh": 1, "regex": "", "type": "query" }, { "current": {}, "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Pod", "multi": false, "name": "pod", "options": [], "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", "refresh": 1, "regex": "", "type": "query" }, { "allValue": ".*", "current": {}, "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Container", "multi": false, "name": "container", "options": [], "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", "refresh": 1, "regex": "", "type": "query" } ] }, "annotations": { "list": [] }, "schemaVersion": 12, "version": 26, "links": [], "gnetId": null },"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}
grafana-kubernetes-cluster-dashboard.json: |
{"dashboard":{ "__inputs": [ { "name": "DS_PROMETHEUS", "label": "prometheus", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__requires": [ { "type": "panel", "id": "singlestat", "name": "Singlestat", "version": "" }, { "type": "panel", "id": "graph", "name": "Graph", "version": "" }, { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "3.1.1" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" } ], "id": null, "title": "Kubernetes Cluster Monitoring", "description": "", "tags": [], "style": "dark", "timezone": "browser", "editable": true, "hideControls": true, "sharedCrosshair": true, "rows": [ { "collapse": false, "editable": true, "height": "250px", "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "decimals": 2, "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "id": 6, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "targets": [ { "expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",instance=~\"$node\"}[1m] ) )) / count(node_cpu{mode=\"system\",instance=~\"$node:9100\"}) * 100", "interval": "10s", "intervalFactor": 1, "legendFormat": "", "refId": "A", "step": 10 } ], "thresholds": "65, 90", "title": "Cluster CPU Usage", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "aliasColors": {}, "bars": false, "datasource": "${DS_PROMETHEUS}", "decimals": 3, "editable": true, "error": false, "fill": 0, "grid": { "threshold1": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", "threshold2": null, "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "id": 3, "isNew": true, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": false, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 9, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",instance=~\"$node\"}[1m])))", "interval": "10s", "intervalFactor": 1, "legendFormat": "CPU Usage", "metric": "container_cpu_usage_seconds_total", "refId": "A", "step": 10 }, { "expr": "count(node_cpu{mode=\"system\",instance=~\"$node:9100\"})", "interval": "10s", "intervalFactor": 1, "legendFormat": "CPU Total", "metric": "node_cpu", "refId": "B", "step": 10 } ], "timeFrom": null, "timeShift": null, "title": "Cluster CPU Usage", "tooltip": { "msResolution": true, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "show": true }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "Row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "id": 4, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "targets": [ { "expr": "(sum(node_memory_MemTotal{instance=~\"$node:9100\"}) - sum(node_memory_MemFree{instance=~\"$node:9100\"}+node_memory_Buffers{instance=~\"$node:9100\"}+node_memory_Cached{instance=~\"$node:9100\"}) ) / sum(node_memory_MemTotal{instance=~\"$node:9100\"}) * 100", "interval": "10s", "intervalFactor": 1, "legendFormat": "", "refId": "A", "step": 10 } ], "thresholds": "65, 90", "title": "Cluster memory usage", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "aliasColors": {}, "bars": false, "datasource": "${DS_PROMETHEUS}", "decimals": 2, "editable": true, "error": false, "fill": 0, "grid": { "threshold1": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", "threshold2": null, "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "id": 2, "isNew": true, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": false, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 9, "stack": false, "steppedLine": false, "targets": [ { "expr": "(sum(node_memory_MemTotal{instance=~\"$node:9100\"}) - sum(node_memory_MemFree{instance=~\"$node:9100\"}+node_memory_Buffers{instance=~\"$node:9100\"}+node_memory_Cached{instance=~\"$node:9100\"}))", "interval": "10s", "intervalFactor": 1, "legendFormat": "Cluster Memory Usage", "metric": "node_memory_MemAvailable", "refId": "A", "step": 10 }, { "expr": "sum(node_memory_MemTotal{instance=~\"$node:9100\"})", "interval": "10s", "intervalFactor": 2, "legendFormat": "Cluster Memory Total", "metric": "node_memory_MemTotal", "refId": "B", "step": 20 } ], "timeFrom": null, "timeShift": null, "title": "Cluster Memory Usage", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "show": true }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "New row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "${DS_PROMETHEUS}", "decimals": 2, "editable": true, "error": false, "fill": 0, "grid": { "threshold1": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", "threshold2": null, "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "id": 9, "isNew": true, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": true, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (node_network_receive_bytes{instance=~\"$node:9100\"}[1m]))", "interval": "10s", "intervalFactor": 1, "legendFormat": "Received Bytes", "metric": "network", "refId": "A", "step": 10 }, { "expr": "sum (rate (node_network_transmit_bytes{instance=~\"$node:9100\"}[1m]))", "interval": "10s", "intervalFactor": 1, "legendFormat": "Transmitted Bytes", "metric": "network", "refId": "B", "step": 10 } ], "timeFrom": null, "timeShift": null, "title": "Cluster Network I/O", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "show": true }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "New row" } ], "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "templating": { "list": [ { "allValue": ".*", "current": {}, "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Node", "multi": false, "name": "node", "options": [], "query": "label_values(kube_node_info, node)", "refresh": 1, "regex": "", "type": "query" } ] }, "annotations": { "list": [] }, "refresh": "10s", "schemaVersion": 12, "version": 0, "links": [ { "icon": "external link", "includeVars": true, "tags": [], "targetBlank": false, "type": "dashboards" } ], "gnetId": 162 },"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}
{
"dashboard": {
"__inputs": [
{
"description": "",
"label": "prometheus",
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"pluginName": "Prometheus",
"type": "datasource"
}
],
"__requires": [
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": ""
},
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "3.1.1"
},
{
"id": "prometheus",
"name": "Prometheus",
"type": "datasource",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"hideControls": false,
"id": null,
"links": [],
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 1,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "Current: {{ container_name }}",
"metric": "container_memory_usage_bytes",
"refId": "A",
"step": 10
},
{
"expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}",
"interval": "10s",
"intervalFactor": 2,
"legendFormat": "Requested: {{ container }}",
"metric": "kube_pod_container_requested_memory_bytes",
"refId": "B",
"step": 20
}
],
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 2,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )",
"intervalFactor": 2,
"legendFormat": "{{ container_name }}",
"refId": "A",
"step": 30
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 3,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))",
"intervalFactor": 2,
"legendFormat": "{{ pod_name }}",
"refId": "A",
"step": 30
}
],
"timeFrom": null,
"timeShift": null,
"title": "Network I/O",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"schemaVersion": 12,
"sharedCrosshair": true,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": true,
"label": "Namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": "label_values(kube_pod_info, namespace)",
"refresh": 1,
"regex": "",
"type": "query"
},
{
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": false,
"label": "Pod",
"multi": false,
"name": "pod",
"options": [],
"query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)",
"refresh": 1,
"regex": "",
"type": "query"
},
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"hide": 0,
"includeAll": true,
"label": "Container",
"multi": false,
"name": "container",
"options": [],
"query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)",
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Pods",
"version": 26
},
"inputs": [
{
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"type": "datasource",
"value": "prometheus"
}
],
"overwrite": true
}
prometheus-datasource.json: |
{"name":"prometheus","type":"prometheus","url":"http://prometheus-k8s.monitoring.svc:9090","access":"proxy","basicAuth":false}
{
"access": "proxy",
"basicAuth": false,
"name": "prometheus",
"type": "prometheus",
"url": "http://prometheus-k8s.monitoring.svc:9090"
}
kind: ConfigMap
metadata:
creationTimestamp: null
name: grafana-dashboards
apiVersion: v1
data:
.gitkeep: ""
kind: ConfigMap
metadata:
creationTimestamp: null
name: prometheus-k8s-alerts
......@@ -7,6 +7,9 @@ data:
global:
evaluation_interval: 30s
rule_files:
- /etc/prometheus/rules/*.rules
scrape_configs:
- job_name: kubelets
scrape_interval: 20s
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment