diff --git a/README.md b/README.md index 84bf6c2882c90ea81639aaf8fcb74bf3493c44a5..cc24beaf80089074f676650b5063242494acfa0f 100644 --- a/README.md +++ b/README.md @@ -223,7 +223,7 @@ These are the available fields with their respective default values: kubeRbacProxy: "v0.3.1", addonResizer: "1.0", prometheusOperator: "v0.23.2", - prometheus: "v2.3.2", + prometheus: "v2.4.3", }, imageRepos+:: { diff --git a/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet index 809cb1e962f09b374fc82ba07ce79404e6bef1bd..d6cbc5fec0baa0d906c1b19213e31d053735343a 100644 --- a/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet +++ b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet @@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; namespace: 'default', versions+:: { - prometheus: 'v2.4.2', + prometheus: 'v2.4.3', }, imageRepos+:: { diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 4a773925f0a32eec13914c7ecdcda81b3e49e8a3..1fe6b56bb5d8e82361e188df9a4f13e175b86d92 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" } }, - "version": "004e648d186bc7be6f1f519da26f96bc2533f1b6" + "version": "e53530d13d400496721104c2d30f52fe2b6ff427" }, { "name": "ksonnet", @@ -28,7 +28,7 @@ "subdir": "" } }, - "version": "19da1eb2f2558dad0f8d9e280cc1fe7bc835677b" + "version": "d24c4066aa2653370e1403812202eb38b2e70210" }, { "name": "grafonnet", @@ -58,7 +58,7 @@ "subdir": "grafana" } }, - "version": "9a20f81c9007e4c7409dd0b3edda1a7a78ad2c63" + "version": "850525cfa7a82115cf7a8a85f5ca632f4632be3d" }, { "name": "prometheus-operator", @@ -78,7 +78,7 @@ "subdir": "Documentation/etcd-mixin" } }, - "version": "c74998267c71ef4a0fa847ce16d620b7fe3580bf" + "version": "ba606bf85edfb3007f27a97d41f54e3fe3f70ce6" } ] } diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index cb8cc9d8453619bd5bb8f43196dba6ff9fd9640e..e378f6898b39d82e8c726539882149dcc93837a5 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -16,7 +16,7 @@ spec: app: grafana spec: containers: - - image: grafana/grafana:5.2.1 + - image: grafana/grafana:5.2.4 name: grafana ports: - containerPort: 3000 diff --git a/manifests/prometheus-prometheus.yaml b/manifests/prometheus-prometheus.yaml index 89d69c9fe3c924094d25b8ef404d79507df8901e..ae18cd67553bfc8a5191b943f400471c6257faa8 100644 --- a/manifests/prometheus-prometheus.yaml +++ b/manifests/prometheus-prometheus.yaml @@ -25,4 +25,4 @@ spec: serviceAccountName: prometheus-k8s serviceMonitorNamespaceSelector: {} serviceMonitorSelector: {} - version: v2.4.2 + version: v2.4.3 diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 2f1cd4ab22203fb71f96cf7860b626438f22094b..db10ae315ca08a7e399339d6c4234b45698f1fd3 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -565,10 +565,10 @@ spec: - alert: KubePodCrashLooping annotations: message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container - }}) is restarting {{ printf "%.2f" $value }} times / second. + }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping expr: | - rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) > 0 + rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 for: 1h labels: severity: critical @@ -784,6 +784,17 @@ spec: for: 15m labels: severity: warning + - alert: CPUThrottlingHigh + annotations: + message: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace + }} for {{ $labels.container_name }}.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh + expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total[5m])) by + (container_name, pod_name, namespace) \n / \nsum(increase(container_cpu_cfs_periods_total[5m])) + by (container_name, pod_name, namespace)\n > 25 \n" + for: 15m + labels: + severity: warning - name: kubernetes-storage rules: - alert: KubePersistentVolumeUsageCritical @@ -807,7 +818,13 @@ spec: days. Currently {{ $value }} bytes are available. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays expr: | - kubelet_volume_stats_available_bytes{job="kubelet"} and predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0 + ( + kubelet_volume_stats_used_bytes{job="kubelet"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet"} + ) > 0.85 + and + predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0 for: 5m labels: severity: critical