diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 5c6929a470c3fea0610a0819e8cff3c352ba1b2a..86630edb3833cf8a9c07b2f22e944f6593cc7e31 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -28,7 +28,7 @@ "subdir": "" } }, - "version": "4c23c06fff9ef50744f5ed306c9ab0c4bd78a144" + "version": "2e358ff68f00bd0dead547beaddc6ce7526864e8" }, { "name": "grafonnet", diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 4900caaece75f1690d21db20d2753824c0ed7c47..dc491cb6f374caf54f9185c98c773a297dc65cec 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -6730,7 +6730,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", + "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ container_name }}", @@ -6833,7 +6833,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", pod_name=\"$pod\"}[1m])))", + "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ pod_name }}", diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 05e0debf10d0392cfde9e0f9ea3763ccbec94383..e578dad05cabb4f73eea8135a448ae3607bf0042 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -626,8 +626,8 @@ spec: }} for container {{ $labels.container_name }} in pod {{ $labels.pod_name }}.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh - expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{}[5m])) - by (container_name, pod_name, namespace) \n / \nsum(increase(container_cpu_cfs_periods_total{}[5m])) + expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!=\"\", + }[5m])) by (container_name, pod_name, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace)\n > 25 \n" for: 15m labels: