From 3b8e685082c403c9844895e972e4a4dd1ee3febd Mon Sep 17 00:00:00 2001
From: karancode <karan.thanvi@paypay-corp.co.jp>
Date: Tue, 22 Oct 2019 02:24:07 +0900
Subject: [PATCH] add aws-eks-cni service

---
 examples/kustomize.jsonnet                    |  2 +-
 .../kube-prometheus-aws-eks-cni.libsonnet     | 13 +++
 jsonnetfile.lock.json                         | 14 +--
 kustomization.yaml                            |  1 +
 manifests/grafana-dashboardDefinitions.yaml   | 98 ++++++++++++-------
 manifests/prometheus-rules.yaml               | 16 +++
 .../prometheus-serviceMonitorAwsEksCNI.yaml   | 19 ++++
 7 files changed, 119 insertions(+), 44 deletions(-)
 create mode 100644 jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet
 create mode 100644 manifests/prometheus-serviceMonitorAwsEksCNI.yaml

diff --git a/examples/kustomize.jsonnet b/examples/kustomize.jsonnet
index db3ba344..438e69b1 100644
--- a/examples/kustomize.jsonnet
+++ b/examples/kustomize.jsonnet
@@ -1,5 +1,5 @@
 local kp =
-  (import 'kube-prometheus/kube-prometheus.libsonnet') + {
+  (import 'kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet') + {
     _config+:: {
       namespace: 'monitoring',
     },
diff --git a/jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet
new file mode 100644
index 00000000..e6dce38b
--- /dev/null
+++ b/jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet
@@ -0,0 +1,13 @@
+local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
+local service = k.core.v1.service;
+local servicePort = k.core.v1.service.mixin.spec.portsType;
+
+{
+  prometheus+: {
+    kubePrometheusAwsEksCniMetricService:
+        service.new('aws-eks-cni', { 'k8s-app' : 'eks-cni' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) +
+        service.mixin.metadata.withNamespace('kube-system') +
+        service.mixin.metadata.withLabels({ 'k8s-app': 'eks-cni' }) +
+        service.mixin.spec.withClusterIp('None'),
+  },
+}
diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json
index 5d054e2b..66bfafd6 100644
--- a/jsonnetfile.lock.json
+++ b/jsonnetfile.lock.json
@@ -7,7 +7,7 @@
                     "directory": "jsonnet/kube-prometheus"
                 }
             },
-            "version": ""
+            "version": "aws_eks_cni"
         },
         {
             "name": "ksonnet",
@@ -27,7 +27,7 @@
                     "subdir": ""
                 }
             },
-            "version": "15ddfa20a6921ffbd43172eb54f6bdc1bcf8d3d3"
+            "version": "3ad401ea3ef7fb0879298fa411772984ffa7f31f"
         },
         {
             "name": "grafonnet",
@@ -37,7 +37,7 @@
                     "subdir": "grafonnet"
                 }
             },
-            "version": "69bc267211790a1c3f4ea6e6211f3e8ffe22f987"
+            "version": "47db72da03fc4a7a0658a87791e13c3315a3a252"
         },
         {
             "name": "grafana-builder",
@@ -47,7 +47,7 @@
                     "subdir": "grafana-builder"
                 }
             },
-            "version": "e59d64a96a73e65ba53ba7fe05c9598974cc4a52"
+            "version": "3fe9a46d5fe0b70cbcabec1d2054f8ac3b3faae7"
         },
         {
             "name": "grafana",
@@ -77,7 +77,7 @@
                     "subdir": "Documentation/etcd-mixin"
                 }
             },
-            "version": "3ef2ad8e115449a7004b628a873e2629855ed468"
+            "version": "5dc12f27251ad6f5f0744ad33ea7d731480f4b87"
         },
         {
             "name": "prometheus",
@@ -87,7 +87,7 @@
                     "subdir": "documentation/prometheus-mixin"
                 }
             },
-            "version": "b05b5f9a300b0209689c06d70f676291f23774c4"
+            "version": "b5a16a8f861c29799f9a903f1e0859f513e862ed"
         },
         {
             "name": "node-mixin",
@@ -107,7 +107,7 @@
                     "subdir": "lib/promgrafonnet"
                 }
             },
-            "version": "15ddfa20a6921ffbd43172eb54f6bdc1bcf8d3d3"
+            "version": "3ad401ea3ef7fb0879298fa411772984ffa7f31f"
         }
     ]
 }
diff --git a/kustomization.yaml b/kustomization.yaml
index a580ed8e..79f9624f 100644
--- a/kustomization.yaml
+++ b/kustomization.yaml
@@ -62,6 +62,7 @@ resources:
 - ./manifests/prometheus-serviceAccount.yaml
 - ./manifests/prometheus-serviceMonitor.yaml
 - ./manifests/prometheus-serviceMonitorApiserver.yaml
+- ./manifests/prometheus-serviceMonitorAwsEksCNI.yaml
 - ./manifests/prometheus-serviceMonitorCoreDNS.yaml
 - ./manifests/prometheus-serviceMonitorKubeControllerManager.yaml
 - ./manifests/prometheus-serviceMonitorKubeScheduler.yaml
diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml
index 1065be04..9ab00dad 100644
--- a/manifests/grafana-dashboardDefinitions.yaml
+++ b/manifests/grafana-dashboardDefinitions.yaml
@@ -5522,7 +5522,7 @@ items:
                           ],
                           "targets": [
                               {
-                                  "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
+                                  "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -5540,7 +5540,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)",
+                                  "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -5558,7 +5558,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)",
+                                  "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -5567,7 +5567,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
+                                  "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -5576,7 +5576,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
+                                  "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -5585,7 +5585,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
+                                  "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -8717,7 +8717,7 @@ items:
                           "tableColumn": "",
                           "targets": [
                               {
-                                  "expr": "sum(up{job=\"kubelet\"})",
+                                  "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\"})",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "",
@@ -8801,7 +8801,7 @@ items:
                           "tableColumn": "",
                           "targets": [
                               {
-                                  "expr": "sum(kubelet_running_pod_count{job=\"kubelet\", instance=~\"$instance\"})",
+                                  "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -8885,7 +8885,7 @@ items:
                           "tableColumn": "",
                           "targets": [
                               {
-                                  "expr": "sum(kubelet_running_container_count{job=\"kubelet\", instance=~\"$instance\"})",
+                                  "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -8969,7 +8969,7 @@ items:
                           "tableColumn": "",
                           "targets": [
                               {
-                                  "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
+                                  "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -9053,7 +9053,7 @@ items:
                           "tableColumn": "",
                           "targets": [
                               {
-                                  "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
+                                  "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -9137,7 +9137,7 @@ items:
                           "tableColumn": "",
                           "targets": [
                               {
-                                  "expr": "sum(rate(kubelet_node_config_error{job=\"kubelet\", instance=~\"$instance\"}[5m]))",
+                                  "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -9217,7 +9217,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(kubelet_runtime_operations_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
+                                  "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{operation_type}}",
@@ -9308,7 +9308,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(kubelet_runtime_operations_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+                                  "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{operation_type}}",
@@ -9412,7 +9412,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{operation_type}}",
@@ -9516,14 +9516,14 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
+                                  "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} pod",
                                   "refId": "A"
                               },
                               {
-                                  "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
+                                  "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} worker",
@@ -9614,14 +9614,14 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} pod",
                                   "refId": "A"
                               },
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} worker",
@@ -9727,7 +9727,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(storage_operation_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+                                  "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@@ -9820,7 +9820,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(storage_operation_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+                                  "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@@ -9926,7 +9926,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@@ -10030,7 +10030,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+                                  "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{operation_type}}",
@@ -10121,7 +10121,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{operation_type}}",
@@ -10226,7 +10226,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)",
+                                  "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -10317,7 +10317,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -10421,7 +10421,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -10525,28 +10525,28 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+                                  "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "2xx",
                                   "refId": "A"
                               },
                               {
-                                  "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+                                  "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "3xx",
                                   "refId": "B"
                               },
                               {
-                                  "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+                                  "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "4xx",
                                   "refId": "C"
                               },
                               {
-                                  "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+                                  "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "5xx",
@@ -10650,7 +10650,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
+                                  "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}} {{verb}} {{url}}",
@@ -10754,7 +10754,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "process_resident_memory_bytes{job=\"kubelet\",instance=~\"$instance\"}",
+                                  "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -10845,7 +10845,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "rate(process_cpu_seconds_total{job=\"kubelet\",instance=~\"$instance\"}[5m])",
+                                  "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -10936,7 +10936,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "go_goroutines{job=\"kubelet\",instance=~\"$instance\"}",
+                                  "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{instance}}",
@@ -11020,6 +11020,32 @@ items:
                       "allValue": null,
                       "current": {
 
+                      },
+                      "datasource": "$datasource",
+                      "hide": 2,
+                      "includeAll": false,
+                      "label": "cluster",
+                      "multi": false,
+                      "name": "cluster",
+                      "options": [
+
+                      ],
+                      "query": "label_values(kube_pod_info, cluster)",
+                      "refresh": 2,
+                      "regex": "",
+                      "sort": 0,
+                      "tagValuesQuery": "",
+                      "tags": [
+
+                      ],
+                      "tagsQuery": "",
+                      "type": "query",
+                      "useTags": false
+                  },
+                  {
+                      "allValue": null,
+                      "current": {
+
                       },
                       "datasource": "$datasource",
                       "hide": 0,
@@ -11030,7 +11056,7 @@ items:
                       "options": [
 
                       ],
-                      "query": "label_values(kubelet_runtime_operations{job=\"kubelet\"}, instance)",
+                      "query": "label_values(kubelet_runtime_operations{cluster=\"$cluster\", job=\"kubelet\"}, instance)",
                       "refresh": 2,
                       "regex": "",
                       "sort": 0,
diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml
index 007b3548..63a606c4 100644
--- a/manifests/prometheus-rules.yaml
+++ b/manifests/prometheus-rules.yaml
@@ -79,6 +79,22 @@ spec:
           rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])
         ) * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
       record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
+    - expr: |
+        container_memory_working_set_bytes{job="kubelet", image!=""}
+        * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
+      record: node_namespace_pod_container:container_memory_working_set_bytes
+    - expr: |
+        container_memory_rss{job="kubelet", image!=""}
+        * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
+      record: node_namespace_pod_container:container_memory_rss
+    - expr: |
+        container_memory_cache{job="kubelet", image!=""}
+        * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
+      record: node_namespace_pod_container:container_memory_cache
+    - expr: |
+        container_memory_swap{job="kubelet", image!=""}
+        * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
+      record: node_namespace_pod_container:container_memory_swap
     - expr: |
         sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace)
       record: namespace:container_memory_usage_bytes:sum
diff --git a/manifests/prometheus-serviceMonitorAwsEksCNI.yaml b/manifests/prometheus-serviceMonitorAwsEksCNI.yaml
new file mode 100644
index 00000000..07d14f1f
--- /dev/null
+++ b/manifests/prometheus-serviceMonitorAwsEksCNI.yaml
@@ -0,0 +1,19 @@
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  labels:
+    k8s-app: eks-cni
+  name: awsekscni
+  namespace: monitoring
+spec:
+  endpoints:
+  - interval: 30s
+    path: /metrics
+    port: cni-metrics-port
+  jobLabel: k8s-app
+  namespaceSelector:
+    matchNames:
+    - kube-system
+  selector:
+    matchLabels:
+      k8s-app: eks-cni
-- 
GitLab