diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 111f4e8757fb3af9532a7bda79038f77ecdf6658..088fd07f2cd403205c8b6b32f31c5ece9eafa8b7 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -18,7 +18,7 @@ "subdir": "contrib/mixin" } }, - "version": "fce823ac2830033270f8fe03fa1b56e62bf882b8", + "version": "5813dce9ad1a9e31d9b51d199d374e68d233e439", "sum": "IXI3LQIT9NmTPJAk8WLUJd5+qZfcGpeNCyWIK7oEpws=" }, { @@ -118,8 +118,8 @@ "subdir": "" } }, - "version": "35aebcabd8b3c166dbcc150ea0cd7387130830da", - "sum": "Yf2mDNR9M1Mg+iEbiO4MQZqVvqWwIJZaDRaiOpByYgw=" + "version": "af5e89820645ab7f99c8be0c247ab2f9f845869d", + "sum": "h+Ay46EcuMsXqx1DyzbVcAOc5C9T7gC9TIn5K5swc2E=" }, { "source": { @@ -128,7 +128,7 @@ "subdir": "jsonnet/kube-state-metrics" } }, - "version": "d3f0c1853f11b387d7dc2c89bd5e52760f65079c", + "version": "c198a18d289d520f462675e622a06a85404b5a0b", "sum": "lO7jUSzAIy8Yk9pOWJIWgPRhubkWzVh56W6wtYfbVH4=" }, { @@ -138,7 +138,7 @@ "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "d3f0c1853f11b387d7dc2c89bd5e52760f65079c", + "version": "c198a18d289d520f462675e622a06a85404b5a0b", "sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c=" }, { @@ -148,7 +148,7 @@ "subdir": "jsonnet/mixin" } }, - "version": "96ca1f8c8d186a011b457c3f520bc83b28c4cf10", + "version": "ae04859fc90096b7bbbe297d4fdade73f0d1f1a6", "sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=", "name": "prometheus-operator-mixin" }, @@ -159,8 +159,8 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "96ca1f8c8d186a011b457c3f520bc83b28c4cf10", - "sum": "eutJdZtd3a/Mlvnr1ahROiVZGhDdJFBd7mO8bOUZpdo=" + "version": "ae04859fc90096b7bbbe297d4fdade73f0d1f1a6", + "sum": "hR9044bifEF7RJSZLOX9qvWHqAhuhqnqXkXgmARfneI=" }, { "source": { @@ -169,7 +169,7 @@ "subdir": "doc/alertmanager-mixin" } }, - "version": "3b61ae81a7b859ae4e9ddd34c88d71b4c1691a9e", + "version": "486a463e1076bcf610b62542f53d2604a8a6788a", "sum": "Mf4h1BYLle2nrgjf/HXrBbl0Zk8N+xaoEM017o0BC+k=", "name": "alertmanager" }, @@ -180,7 +180,7 @@ "subdir": "docs/node-mixin" } }, - "version": "8f9a914bee023a614f4eea1005baf2e112bbadb0", + "version": "acdd9b813d71b856fe34df97caa1ed90c8587692", "sum": "xYj6VYFT/eafsbleNlC+Z2VfLy1CndyYrJs9BcTmnX8=" }, { @@ -190,8 +190,8 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "a441ad771e881c0035bc90129bcae682ebf66338", - "sum": "OYT5u3S8DbamuJV/v3gbWSteOvFzMeNwMj+u4Apk7jM=", + "version": "1e81fd216631913104b37fc6a43312cb3af0eec4", + "sum": "CwaQpW66lHx+++sY2g4BgrUTEFZtlDnQzFjo0AlgfIg=", "name": "prometheus" }, { @@ -212,7 +212,7 @@ "subdir": "mixin" } }, - "version": "ca2e23ffb5551feb9688c4379a19bcdf4118a275", + "version": "4ba0ba403896e468dfda3da07ce89aa159dfbe37", "sum": "ieCD4eMgGbOlrI8GmckGPHBGQDcLasE1rULYq56W/bs=", "name": "thanos-mixin" }, diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 1fef0dcd76e00b14fc5188127954a48398b2489c..3ff07e280a07e0886f62a43db574acba69b12595 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -18347,1651 +18347,793 @@ items: data: prometheus-remote-write.json: |- { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "60s", - "rows": [ + "panels": [ { - "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"} != 0)\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Highest Timestamp In vs. Highest Timestamp Sent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - ] + ], + "title": "Timestamps", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n-\n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"} != 0)\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Highest Timestamp In vs. Highest Timestamp Sent", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "id": 3, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])\n, 0)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate[5m]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n-\n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])\n, 0)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Timestamps", - "titleSize": "h6", - "type": "row" + "title": "Rate[5m]", + "type": "timeseries" }, { - "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 4, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { + ], + "title": "Samples", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate, in vs. succeeded or dropped [5m]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n-\n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]))\n-\n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples", - "titleSize": "h6", - "type": "row" + "title": "Rate, in vs. succeeded or dropped [5m]", + "type": "timeseries" }, { - "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 6, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 5, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - ], - "minSpan": 6, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + ], + "title": "Shards", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 7, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Max Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Min Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Current Shards", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 23 + }, + "id": 8, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Desired Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Shards", - "titleSize": "h6", - "type": "row" + "title": "Max Shards", + "type": "timeseries" }, { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Shard Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 23 + }, + "id": 9, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Pending Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Shard Details", - "titleSize": "h6", - "type": "row" + "title": "Min Shards", + "type": "timeseries" }, { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 11, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "TSDB Current Segment", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 23 + }, + "id": 10, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 12, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{consumer}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Remote Write Current Segment", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Segments", - "titleSize": "h6", - "type": "row" + "title": "Desired Shards", + "type": "timeseries" }, { - "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 11, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Dropped Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + ], + "title": "Shard Details", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 12, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "id": 14, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Failed Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Shard Capacity", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 13, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "id": 15, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Retried Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Pending Samples", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 14, + "panels": [ - ] + ], + "title": "Segments", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "showPoints": "never" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 15, + "options": { + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - + "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}}" + } + ], + "title": "TSDB Current Segment", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "showPoints": "never" }, - "id": 16, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Enqueue Retries", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 16, + "options": { + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{consumer}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, + "title": "Remote Write Current Segment", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 17, + "panels": [ + + ], "title": "Misc. Rates", - "titleSize": "h6", "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "prometheus-mixin" - ], - "templating": { - "list": [ - { - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 41 + }, + "id": 18, + "options": { + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Dropped Samples", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 41 + }, + "id": 19, + "options": { + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Failed Samples", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 41 + }, + "id": 20, + "options": { + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Retried Samples", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 41 + }, + "id": 21, + "options": { + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}" + } + ], + "title": "Enqueue Retries", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [ + "prometheus-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "name": "datasource", + "query": "prometheus", + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "$__all", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "hide": 0, "includeAll": true, - "label": null, - "multi": false, "name": "cluster", - "options": [ - - ], "query": "label_values(prometheus_build_info, cluster)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "datasource": "$datasource", - "hide": 0, "includeAll": true, - "label": null, - "multi": false, "name": "instance", - "options": [ - - ], "query": "label_values(prometheus_build_info{cluster=~\"$cluster\"}, instance)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { - "allValue": null, - "current": { - + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "datasource": "$datasource", - "hide": 0, "includeAll": true, - "label": null, - "multi": false, "name": "url", - "options": [ - - ], "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" } ] }, "time": { - "from": "now-6h", + "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" + "60s" ] }, - "timezone": "browser", - "title": "Prometheus / Remote Write", - "version": 0 + "timezone": "utc", + "title": "Prometheus / Remote Write" } kind: ConfigMap metadata: @@ -20006,1093 +19148,746 @@ items: data: prometheus.json: |- { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "60s", - "rows": [ + "panels": [ { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" + ], + "title": "Prometheus Stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "displayName": "", + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" }, - { - "alias": "Count", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align", + "value": null + }, + { + "id": "custom.hidden", + "value": "true" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cluster" }, - { - "alias": "Uptime", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "s" + "properties": [ + { + "id": "custom.align", + "value": null + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "displayName", + "value": "Cluster" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "job" }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "cluster", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "properties": [ + { + "id": "custom.align", + "value": null + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "displayName", + "value": "Job" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "instance" }, - { - "alias": "Instance", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "instance", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "properties": [ + { + "id": "displayName", + "value": "Instance" + }, + { + "id": "custom.align", + "value": null + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "version" }, - { - "alias": "Job", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "job", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "properties": [ + { + "id": "displayName", + "value": "Version" + }, + { + "id": "custom.align", + "value": null + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" }, - { - "alias": "Version", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "version", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "properties": [ + { + "id": "displayName", + "value": "Count" + }, + { + "id": "custom.align", + "value": null + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.hidden", + "value": "true" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + "properties": [ + { + "id": "displayName", + "value": "Uptime" + }, + { + "id": "custom.align", + "value": null + }, + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "count by (cluster, job, instance, version) (prometheus_build_info{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "legendFormat": "" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "legendFormat": "" + } + ], + "title": "Prometheus Stats", + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "panels": [ - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "count by (cluster, job, instance, version) (prometheus_build_info{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - }, - { - "expr": "max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "B" + ], + "title": "Discovery", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" + }, + "min": 0, + "unit": "ms" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 4, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[5m])) by (cluster, job, scrape_job, instance) * 1e3", + "format": "time_series", + "legendFormat": "{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}" + } + ], + "title": "Target Sync", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "lineWidth": 0, + "showPoints": "never", + "stacking": { + "mode": "normal" } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Prometheus Stats", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "min": 0, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 5, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "legendFormat": "{{cluster}}:{{job}}:{{instance}}" + } + ], + "title": "Targets", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 6, + "panels": [ - ] + ], + "title": "Retrieval", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never" + }, + "min": 0, + "unit": "ms" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 17 + }, + "id": 7, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", + "format": "time_series", + "legendFormat": "{{cluster}}:{{job}}:{{instance}} {{interval}} configured" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Prometheus Stats", - "titleSize": "h6" + "title": "Average Scrape Interval Duration", + "type": "timeseries" }, { - "collapse": false, - "height": "250px", - "panels": [ + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "lineWidth": 0, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + }, + "min": 0, + "unit": "ms" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 17 + }, + "id": 8, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[5m])) by (cluster, job, scrape_job, instance) * 1e3", - "format": "time_series", - "legendFormat": "{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}", - "legendLink": null - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Target Sync", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", + "format": "time_series", + "legendFormat": "exceeded body size limit: {{cluster}} {{job}} {{instance}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", + "format": "time_series", + "legendFormat": "exceeded sample limit: {{cluster}} {{job}} {{instance}}" }, { - "aliasColors": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"})", - "format": "time_series", - "legendFormat": "{{cluster}}:{{job}}:{{instance}}", - "legendLink": null - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Targets", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", + "format": "time_series", + "legendFormat": "duplicate timestamp: {{cluster}} {{job}} {{instance}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", + "format": "time_series", + "legendFormat": "out of bounds: {{cluster}} {{job}} {{instance}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", + "format": "time_series", + "legendFormat": "out of order: {{cluster}} {{job}} {{instance}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Discovery", - "titleSize": "h6" + "title": "Scrape failures", + "type": "timeseries" }, { - "collapse": false, - "height": "250px", - "panels": [ + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "lineWidth": 0, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + }, + "min": 0, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 17 + }, + "id": 9, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + "expr": "rate(prometheus_tsdb_head_samples_appended_total{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m])", + "format": "time_series", + "legendFormat": "{{cluster}} {{job}} {{instance}}" + } + ], + "title": "Appended Samples", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 10, + "panels": [ - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", - "format": "time_series", - "legendFormat": "{{cluster}}:{{job}}:{{instance}} {{interval}} configured", - "legendLink": null + ], + "title": "Storage", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "lineWidth": 0, + "showPoints": "never", + "stacking": { + "mode": "normal" } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Average Scrape Interval Duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, + "min": 0, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 11, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", - "format": "time_series", - "legendFormat": "exceeded body size limit: {{cluster}} {{job}} {{instance}}", - "legendLink": null - }, - { - "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", - "format": "time_series", - "legendFormat": "exceeded sample limit: {{cluster}} {{job}} {{instance}}", - "legendLink": null - }, - { - "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", - "format": "time_series", - "legendFormat": "duplicate timestamp: {{cluster}} {{job}} {{instance}}", - "legendLink": null - }, - { - "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", - "format": "time_series", - "legendFormat": "out of bounds: {{cluster}} {{job}} {{instance}}", - "legendLink": null - }, - { - "expr": "sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))", - "format": "time_series", - "legendFormat": "out of order: {{cluster}} {{job}} {{instance}}", - "legendLink": null + "expr": "prometheus_tsdb_head_series{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "legendFormat": "{{cluster}} {{job}} {{instance}} head series" + } + ], + "title": "Head Series", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "lineWidth": 0, + "showPoints": "never", + "stacking": { + "mode": "normal" } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Scrape failures", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, + "min": 0, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 12, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_tsdb_head_samples_appended_total{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m])", - "format": "time_series", - "legendFormat": "{{cluster}} {{job}} {{instance}}", - "legendLink": null - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Appended Samples", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "prometheus_tsdb_head_chunks{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "legendFormat": "{{cluster}} {{job}} {{instance}} head chunks" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Retrieval", - "titleSize": "h6" + "title": "Head Chunks", + "type": "timeseries" }, { - "collapse": false, - "height": "250px", + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 13, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_tsdb_head_series{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}", - "format": "time_series", - "legendFormat": "{{cluster}} {{job}} {{instance}} head series", - "legendLink": null + ], + "title": "Query", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "lineWidth": 0, + "showPoints": "never", + "stacking": { + "mode": "normal" } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Head Series", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, + "min": 0, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 14, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_tsdb_head_chunks{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}", - "format": "time_series", - "legendFormat": "{{cluster}} {{job}} {{instance}} head chunks", - "legendLink": null - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Head Chunks", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "rate(prometheus_engine_query_duration_seconds_count{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", + "format": "time_series", + "legendFormat": "{{cluster}} {{job}} {{instance}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Storage", - "titleSize": "h6" + "title": "Query Rate", + "type": "timeseries" }, { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_engine_query_duration_seconds_count{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", - "format": "time_series", - "legendFormat": "{{cluster}} {{job}} {{instance}}", - "legendLink": null + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "lineWidth": 0, + "showPoints": "never", + "stacking": { + "mode": "normal" } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Query Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, + "min": 0, + "unit": "ms" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 15, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}) * 1e3", - "format": "time_series", - "legendFormat": "{{slice}}", - "legendLink": null - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Stage Duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}) * 1e3", + "format": "time_series", + "legendFormat": "{{slice}}" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query", - "titleSize": "h6" + "title": "Stage Duration", + "type": "timeseries" } ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "prometheus-mixin" ], @@ -21100,103 +19895,70 @@ items: "list": [ { "current": { + "selected": false, "text": "default", "value": "default" }, "hide": 0, "label": "Data source", "name": "datasource", - "options": [ - - ], "query": "prometheus", - "refresh": 1, - "regex": "", "type": "datasource" }, { "allValue": ".+", "current": { - "selected": true, - "text": "All", - "value": "$__all" + "selected": false, + "text": [ + "$__all" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "datasource": "$datasource", "hide": 0, "includeAll": true, "label": "cluster", "multi": true, "name": "cluster", - "options": [ - - ], "query": "label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, cluster)", - "refresh": 1, - "regex": "", + "refresh": 2, "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "datasource": "$datasource", - "hide": 0, "includeAll": true, "label": "job", "multi": true, "name": "job", - "options": [ - - ], "query": "label_values(prometheus_build_info{cluster=~\"$cluster\"}, job)", - "refresh": 1, - "regex": "", + "refresh": 2, "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "datasource": "$datasource", - "hide": 0, "includeAll": true, "label": "instance", "multi": true, "name": "instance", - "options": [ - - ], "query": "label_values(prometheus_build_info{cluster=~\"$cluster\", job=~\"$job\"}, instance)", - "refresh": 1, - "regex": "", + "refresh": 2, "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" } ] }, @@ -21206,33 +19968,11 @@ items: }, "timepicker": { "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" + "60s" ] }, "timezone": "utc", - "title": "Prometheus / Overview", - "uid": "", - "version": 0 + "title": "Prometheus / Overview" } kind: ConfigMap metadata: diff --git a/manifests/kubernetesControlPlane-prometheusRule.yaml b/manifests/kubernetesControlPlane-prometheusRule.yaml index 3b155f8f07f885794bf1c02a6178337e32119dea..f22bd94f41be891585ee67dfd4498e9ca8799e58 100644 --- a/manifests/kubernetesControlPlane-prometheusRule.yaml +++ b/manifests/kubernetesControlPlane-prometheusRule.yaml @@ -14,7 +14,7 @@ spec: rules: - alert: KubePodCrashLooping annotations: - description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff").' + description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff") on cluster {{ $labels.cluster }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping summary: Pod is crash looping. expr: | @@ -24,7 +24,7 @@ spec: severity: warning - alert: KubePodNotReady annotations: - description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes. + description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready summary: Pod has been in a non-ready state for more than 15 minutes. expr: | @@ -40,7 +40,7 @@ spec: severity: warning - alert: KubeDeploymentGenerationMismatch annotations: - description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back. + description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch summary: Deployment generation mismatch due to possible roll-back expr: | @@ -52,7 +52,7 @@ spec: severity: warning - alert: KubeDeploymentReplicasMismatch annotations: - description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes. + description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch summary: Deployment has not matched the expected number of replicas. expr: | @@ -70,7 +70,7 @@ spec: severity: warning - alert: KubeDeploymentRolloutStuck annotations: - description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes. + description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck summary: Deployment rollout is not progressing. expr: | @@ -81,7 +81,7 @@ spec: severity: warning - alert: KubeStatefulSetReplicasMismatch annotations: - description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes. + description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch summary: StatefulSet has not matched the expected number of replicas. expr: | @@ -99,7 +99,7 @@ spec: severity: warning - alert: KubeStatefulSetGenerationMismatch annotations: - description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back. + description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch summary: StatefulSet generation mismatch due to possible roll-back expr: | @@ -111,7 +111,7 @@ spec: severity: warning - alert: KubeStatefulSetUpdateNotRolledOut annotations: - description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out. + description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout summary: StatefulSet update has not been rolled out. expr: | @@ -137,7 +137,7 @@ spec: severity: warning - alert: KubeDaemonSetRolloutStuck annotations: - description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m. + description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck summary: DaemonSet rollout is stuck. expr: | @@ -169,7 +169,7 @@ spec: severity: warning - alert: KubeContainerWaiting annotations: - description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: "{{ $labels.reason }}").' + description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: "{{ $labels.reason }}") on cluster {{ $labels.cluster }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting summary: Pod container waiting longer than 1 hour expr: | @@ -179,7 +179,7 @@ spec: severity: warning - alert: KubeDaemonSetNotScheduled annotations: - description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.' + description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled on cluster {{ $labels.cluster }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled summary: DaemonSet pods are not scheduled. expr: | @@ -191,7 +191,7 @@ spec: severity: warning - alert: KubeDaemonSetMisScheduled annotations: - description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.' + description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run on cluster {{ $labels.cluster }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled summary: DaemonSet pods are misscheduled. expr: | @@ -201,7 +201,7 @@ spec: severity: warning - alert: KubeJobNotCompleted annotations: - description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "43200" | humanizeDuration }} to complete. + description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "43200" | humanizeDuration }} to complete on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted summary: Job did not complete in time expr: | @@ -212,7 +212,7 @@ spec: severity: warning - alert: KubeJobFailed annotations: - description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert. + description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed summary: Job failed to complete. expr: | @@ -222,7 +222,7 @@ spec: severity: warning - alert: KubeHpaReplicasMismatch annotations: - description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes. + description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch summary: HPA has not matched desired number of replicas. expr: | @@ -244,7 +244,7 @@ spec: severity: warning - alert: KubeHpaMaxedOut annotations: - description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes. + description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout summary: HPA is running at max replicas expr: | @@ -308,7 +308,7 @@ spec: severity: warning - alert: KubeQuotaAlmostFull annotations: - description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull summary: Namespace quota is going to be full. expr: | @@ -321,7 +321,7 @@ spec: severity: info - alert: KubeQuotaFullyUsed annotations: - description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused summary: Namespace quota is fully used. expr: | @@ -334,7 +334,7 @@ spec: severity: info - alert: KubeQuotaExceeded annotations: - description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded summary: Namespace quota has exceeded the limits. expr: | @@ -347,7 +347,7 @@ spec: severity: warning - alert: CPUThrottlingHigh annotations: - description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.' + description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }} on cluster {{ $labels.cluster }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh summary: Processes experience elevated CPU throttling. expr: | @@ -458,7 +458,7 @@ spec: rules: - alert: KubeVersionMismatch annotations: - description: There are {{ $value }} different semantic versions of Kubernetes components running. + description: There are {{ $value }} different semantic versions of Kubernetes components running on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch summary: Different semantic versions of Kubernetes components running. expr: | @@ -468,7 +468,7 @@ spec: severity: warning - alert: KubeClientErrors annotations: - description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.' + description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors summary: Kubernetes API server client is experiencing errors. expr: | @@ -483,7 +483,7 @@ spec: rules: - alert: KubeAPIErrorBudgetBurn annotations: - description: The API server is burning too much error budget. + description: The API server is burning too much error budget on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: | @@ -497,7 +497,7 @@ spec: short: 5m - alert: KubeAPIErrorBudgetBurn annotations: - description: The API server is burning too much error budget. + description: The API server is burning too much error budget on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: | @@ -511,7 +511,7 @@ spec: short: 30m - alert: KubeAPIErrorBudgetBurn annotations: - description: The API server is burning too much error budget. + description: The API server is burning too much error budget on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: | @@ -525,7 +525,7 @@ spec: short: 2h - alert: KubeAPIErrorBudgetBurn annotations: - description: The API server is burning too much error budget. + description: The API server is burning too much error budget on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: | @@ -575,7 +575,7 @@ spec: severity: warning - alert: KubeAggregatedAPIDown annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m. + description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown summary: Kubernetes aggregated API is down. expr: | @@ -595,7 +595,7 @@ spec: severity: critical - alert: KubeAPITerminatedRequests annotations: - description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. + description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. expr: | @@ -607,7 +607,7 @@ spec: rules: - alert: KubeNodeNotReady annotations: - description: '{{ $labels.node }} has been unready for more than 15 minutes.' + description: '{{ $labels.node }} has been unready for more than 15 minutes on cluster {{ $labels.cluster }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready summary: Node is not ready. expr: | @@ -617,7 +617,7 @@ spec: severity: warning - alert: KubeNodeUnreachable annotations: - description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' + description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled on cluster {{ $labels.cluster }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable summary: Node is unreachable. expr: | @@ -627,23 +627,27 @@ spec: severity: warning - alert: KubeletTooManyPods annotations: - description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity. + description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods summary: Kubelet is running at capacity. expr: | - count by(cluster, node) ( - (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job="kube-state-metrics"}) + count by (cluster, node) ( + (kube_pod_status_phase{job="kube-state-metrics", phase="Running"} == 1) + * on (cluster, namespace, pod) group_left (node) + group by (cluster, namespace, pod, node) ( + kube_pod_info{job="kube-state-metrics"} + ) ) / - max by(cluster, node) ( - kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 + max by (cluster, node) ( + kube_node_status_capacity{job="kube-state-metrics", resource="pods"} != 1 ) > 0.95 for: 15m labels: severity: info - alert: KubeNodeReadinessFlapping annotations: - description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes. + description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping summary: Node readiness status is flapping. expr: | @@ -653,7 +657,7 @@ spec: severity: warning - alert: KubeletPlegDurationHigh annotations: - description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}. + description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. expr: | @@ -663,7 +667,7 @@ spec: severity: warning - alert: KubeletPodStartUpLatencyHigh annotations: - description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}. + description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh summary: Kubelet Pod startup latency is too high. expr: | @@ -673,7 +677,7 @@ spec: severity: warning - alert: KubeletClientCertificateExpiration annotations: - description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration summary: Kubelet client certificate is about to expire. expr: | @@ -682,7 +686,7 @@ spec: severity: warning - alert: KubeletClientCertificateExpiration annotations: - description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration summary: Kubelet client certificate is about to expire. expr: | @@ -691,7 +695,7 @@ spec: severity: critical - alert: KubeletServerCertificateExpiration annotations: - description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration summary: Kubelet server certificate is about to expire. expr: | @@ -700,7 +704,7 @@ spec: severity: warning - alert: KubeletServerCertificateExpiration annotations: - description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration summary: Kubelet server certificate is about to expire. expr: | @@ -709,7 +713,7 @@ spec: severity: critical - alert: KubeletClientCertificateRenewalErrors annotations: - description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes). + description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors summary: Kubelet has failed to renew its client certificate. expr: | @@ -719,7 +723,7 @@ spec: severity: warning - alert: KubeletServerCertificateRenewalErrors annotations: - description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes). + description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors summary: Kubelet has failed to renew its server certificate. expr: | diff --git a/manifests/setup/0prometheusCustomResourceDefinition.yaml b/manifests/setup/0prometheusCustomResourceDefinition.yaml index 41c7c5647d9ffb698319e7fb7b1eed7954290aab..484c08a9454a54959ff266cb04a98e6fbc8653ac 100644 --- a/manifests/setup/0prometheusCustomResourceDefinition.yaml +++ b/manifests/setup/0prometheusCustomResourceDefinition.yaml @@ -5290,6 +5290,13 @@ spec: Settings related to the OTLP receiver feature. It requires Prometheus >= v2.55.0. properties: + keepIdentifyingResourceAttributes: + description: |- + Enables adding `service.name`, `service.namespace` and `service.instance.id` + resource attributes to the `target_info` metric, on top of converting them into the `instance` and `job` labels. + + It requires Prometheus >= v3.1.0. + type: boolean promoteResourceAttributes: description: List of OpenTelemetry Attributes that should be promoted to metric labels, defaults to none. items: @@ -5301,7 +5308,6 @@ spec: translationStrategy: description: |- Configures how the OTLP receiver endpoint translates the incoming metrics. - If unset, Prometheus uses its default value. It requires Prometheus >= v3.0.0. enum: diff --git a/manifests/setup/0prometheusagentCustomResourceDefinition.yaml b/manifests/setup/0prometheusagentCustomResourceDefinition.yaml index ccdb0305d510de98a6178ec5f54222c43dc67da4..93a0f47d0eb48bc9b90fd130e035b1463a54f667 100644 --- a/manifests/setup/0prometheusagentCustomResourceDefinition.yaml +++ b/manifests/setup/0prometheusagentCustomResourceDefinition.yaml @@ -4564,6 +4564,13 @@ spec: Settings related to the OTLP receiver feature. It requires Prometheus >= v2.55.0. properties: + keepIdentifyingResourceAttributes: + description: |- + Enables adding `service.name`, `service.namespace` and `service.instance.id` + resource attributes to the `target_info` metric, on top of converting them into the `instance` and `job` labels. + + It requires Prometheus >= v3.1.0. + type: boolean promoteResourceAttributes: description: List of OpenTelemetry Attributes that should be promoted to metric labels, defaults to none. items: @@ -4575,7 +4582,6 @@ spec: translationStrategy: description: |- Configures how the OTLP receiver endpoint translates the incoming metrics. - If unset, Prometheus uses its default value. It requires Prometheus >= v3.0.0. enum: