diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 87e216584d0d74045b951500d02bd8d1039ba5ba..d3dac64f6c056735d42412c2dbc9f55348204639 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" } }, - "version": "360f5ad835e12f3627a45f2df0487f73ec8969e3" + "version": "8582fcce481d4302a9c29e9b5e4182c5d98d67ed" }, { "name": "ksonnet", @@ -28,7 +28,7 @@ "subdir": "" } }, - "version": "64a06754786ca1a28929b9fb05c381085dcdd44c" + "version": "4c23c06fff9ef50744f5ed306c9ab0c4bd78a144" }, { "name": "grafonnet", @@ -38,7 +38,7 @@ "subdir": "grafonnet" } }, - "version": "eea8b5ba6b8883cf2df5a17c39a42c4b57c0d63e" + "version": "d05a9bd3adfba39ed3b1987a428d9e9863428df5" }, { "name": "grafana-builder", @@ -48,7 +48,7 @@ "subdir": "grafana-builder" } }, - "version": "d60a39a5c01f651fdfef2db7a710bb5319b0dbc4" + "version": "c6932cf90bce4fef218b4308effc9f15c4219a01" }, { "name": "grafana", @@ -68,7 +68,7 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "72ec4b9b16ef11700724dc71fec77112536eed40" + "version": "174c9bf17bec78b055e8e63c6ae3a3dc9bb0a3a8" }, { "name": "etcd-mixin", @@ -78,7 +78,7 @@ "subdir": "Documentation/etcd-mixin" } }, - "version": "dedae6eb7c253635e70403a1fb04842700277b23" + "version": "b04633fd8e67c65d4fe4929333fb4856f25da189" } ] } diff --git a/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml b/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml index d5c94fc9867958cdd41b229fa10cfd2759aa6149..89748f1acddcf97c265ded69bf350f4de7e3f8be 100644 --- a/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml +++ b/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml @@ -1378,6 +1378,12 @@ spec: under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. type: string + image: + description: Image if specified has precedence over baseImage, tag and + sha combinations. Specifying the version is still necessary to ensure + the Prometheus Operator knows what version of Alertmanager is being + configured. + type: string imagePullSecrets: description: An optional list of references to secrets in the same namespace to use for pulling prometheus and alertmanager images from registries diff --git a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml b/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml index d825277ebe0cf38eae542c2489f2be959094190f..627ce96d4be312855b1265ef59dc52008562215b 100644 --- a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml +++ b/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml @@ -1550,6 +1550,12 @@ spec: under. This is necessary to generate correct URLs. This is necessary if Prometheus is not served from root of a DNS name. type: string + image: + description: Image if specified has precedence over baseImage, tag and + sha combinations. Specifying the version is still necessary to ensure + the Prometheus Operator knows what version of Prometheus is being + configured. + type: string imagePullSecrets: description: An optional list of references to secrets in the same namespace to use for pulling prometheus and alertmanager images from registries @@ -1863,6 +1869,21 @@ spec: priorityClassName: description: Priority class assigned to the Pods type: string + query: + description: QuerySpec defines the query command line flags when starting + Prometheus. + properties: + lookbackDelta: + description: The delta difference allowed for retrieving metrics + during expression evaluations. + type: string + maxConcurrency: + description: Number of concurrent queries that can be run at once. + format: int32 + type: integer + timeout: + description: Maximum time a query may take before being aborted. + type: string remoteRead: description: If specified, the remote_read spec. This is an experimental feature, it may change in any upcoming release in a breaking way. @@ -2943,6 +2964,12 @@ spec: type: boolean required: - key + image: + description: Image if specified has precedence over baseImage, tag + and sha combinations. Specifying the version is still necessary + to ensure the Prometheus Operator knows what version of Thanos + is being configured. + type: string peers: description: Peers is a DNS name for Thanos to discover peers through. type: string diff --git a/manifests/0prometheus-operator-deployment.yaml b/manifests/0prometheus-operator-deployment.yaml index 1ddbae2f94f3f2f188b33be5a3114cab8f3f962d..55ad90434f568cbdb3d888d137adf354f9ba8cb9 100644 --- a/manifests/0prometheus-operator-deployment.yaml +++ b/manifests/0prometheus-operator-deployment.yaml @@ -20,8 +20,8 @@ spec: - --kubelet-service=kube-system/kubelet - --logtostderr=true - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1 - - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.26.0 - image: quay.io/coreos/prometheus-operator:v0.26.0 + - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.27.0 + image: quay.io/coreos/prometheus-operator:v0.27.0 name: prometheus-operator ports: - containerPort: 8080 diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index e4364aa65b258b43f90d1c78077580eb0d5e495a..f2c1ba403def7972c917d3b81faec83237aaf6aa 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -796,7 +796,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(max(node_filesystem_size_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"} - node_filesystem_avail_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:\n", + "expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -5064,7 +5064,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "avg by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m])) * 100", + "expr": "sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cpu}}", @@ -5775,7 +5775,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", + "expr": "max(rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -5866,7 +5866,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", + "expr": "max(rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -5921,6 +5921,198 @@ items: "title": "Dashboard Row", "titleSize": "h6", "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n node_filesystem_files{job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inodes used", + "refId": "A" + }, + { + "expr": "max(node_filesystem_files_free{job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inodes free", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Inodes Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 13, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(\n (\n (\n node_filesystem_files{job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_filesystem_files{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Inodes Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" } ], "schemaVersion": 14, @@ -6012,6 +6204,349 @@ items: metadata: name: grafana-dashboard-nodes namespace: monitoring +- apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} - kubelet_volume_stats_available_bytes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"}) / kubelet_volume_stats_capacity_bytes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ Usage }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume Space Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_volume_stats_inodes_used{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} / kubelet_volume_stats_inodes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ Usage }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume inodes Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}, exported_namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", exported_namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-persistentvolumesusage + namespace: monitoring - apiVersion: v1 data: pods.json: |- @@ -6078,21 +6613,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=\u007e\"$container\", container_name!=\"POD\"})", + "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Current: {{ container_name }}", "refId": "A" }, { - "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", + "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Requested: {{ container }}", "refId": "B" }, { - "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", + "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Limit: {{ container }}", @@ -6418,7 +6953,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info{namespace=\u007e\"$namespace\"}, pod)", + "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", "refresh": 2, "regex": "", "sort": 0, @@ -6579,7 +7114,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -6659,7 +7194,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}) / 1024^3", + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}) / 1024^3", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -6739,7 +7274,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\",pod_name=\u007e\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\",pod_name=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index 6816ce2f4126b14b53c89e832a3ccbe8761ccfbb..aa5814eb2db4e6355cd30890a239ad171e550f97 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -60,6 +60,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/nodes name: grafana-dashboard-nodes readOnly: false + - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage + name: grafana-dashboard-persistentvolumesusage + readOnly: false - mountPath: /grafana-dashboard-definitions/0/pods name: grafana-dashboard-pods readOnly: false @@ -97,6 +100,9 @@ spec: - configMap: name: grafana-dashboard-nodes name: grafana-dashboard-nodes + - configMap: + name: grafana-dashboard-persistentvolumesusage + name: grafana-dashboard-persistentvolumesusage - configMap: name: grafana-dashboard-pods name: grafana-dashboard-pods diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 01ca8ddbf8e9bb44edea994deb591e7419110a64..05e0debf10d0392cfde9e0f9ea3763ccbec94383 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -264,6 +264,28 @@ spec: node_namespace_pod:kube_pod_info: ) record: node:node_net_saturation:sum_irate + - expr: | + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_total:' + - expr: | + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_free:' - name: kube-prometheus-node-recording.rules rules: - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m])) BY (instance) @@ -680,7 +702,7 @@ spec: }}' is experiencing {{ printf "%0.0f" $value }}% errors.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors expr: | - (sum(rate(rest_client_requests_total{code!~"2..|404"}[5m])) by (instance, job) + (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job) / sum(rate(rest_client_requests_total[5m])) by (instance, job)) * 100 > 1 diff --git a/manifests/prometheus-serviceMonitorCoreDNS.yaml b/manifests/prometheus-serviceMonitorCoreDNS.yaml index 14a245459fee6cff374468d6766875ad5b7ba12d..633aa18cf1d4d352e4f527b726a1127393da6f4d 100644 --- a/manifests/prometheus-serviceMonitorCoreDNS.yaml +++ b/manifests/prometheus-serviceMonitorCoreDNS.yaml @@ -10,6 +10,7 @@ spec: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token interval: 15s port: metrics + jobLabel: k8s-app namespaceSelector: matchNames: - kube-system