From 3b8e685082c403c9844895e972e4a4dd1ee3febd Mon Sep 17 00:00:00 2001 From: karancode <karan.thanvi@paypay-corp.co.jp> Date: Tue, 22 Oct 2019 02:24:07 +0900 Subject: [PATCH] add aws-eks-cni service --- examples/kustomize.jsonnet | 2 +- .../kube-prometheus-aws-eks-cni.libsonnet | 13 +++ jsonnetfile.lock.json | 14 +-- kustomization.yaml | 1 + manifests/grafana-dashboardDefinitions.yaml | 98 ++++++++++++------- manifests/prometheus-rules.yaml | 16 +++ .../prometheus-serviceMonitorAwsEksCNI.yaml | 19 ++++ 7 files changed, 119 insertions(+), 44 deletions(-) create mode 100644 jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet create mode 100644 manifests/prometheus-serviceMonitorAwsEksCNI.yaml diff --git a/examples/kustomize.jsonnet b/examples/kustomize.jsonnet index db3ba344..438e69b1 100644 --- a/examples/kustomize.jsonnet +++ b/examples/kustomize.jsonnet @@ -1,5 +1,5 @@ local kp = - (import 'kube-prometheus/kube-prometheus.libsonnet') + { + (import 'kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet') + { _config+:: { namespace: 'monitoring', }, diff --git a/jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet new file mode 100644 index 00000000..e6dce38b --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-aws-eks-cni.libsonnet @@ -0,0 +1,13 @@ +local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+: { + kubePrometheusAwsEksCniMetricService: + service.new('aws-eks-cni', { 'k8s-app' : 'eks-cni' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'eks-cni' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 5d054e2b..66bfafd6 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -7,7 +7,7 @@ "directory": "jsonnet/kube-prometheus" } }, - "version": "" + "version": "aws_eks_cni" }, { "name": "ksonnet", @@ -27,7 +27,7 @@ "subdir": "" } }, - "version": "15ddfa20a6921ffbd43172eb54f6bdc1bcf8d3d3" + "version": "3ad401ea3ef7fb0879298fa411772984ffa7f31f" }, { "name": "grafonnet", @@ -37,7 +37,7 @@ "subdir": "grafonnet" } }, - "version": "69bc267211790a1c3f4ea6e6211f3e8ffe22f987" + "version": "47db72da03fc4a7a0658a87791e13c3315a3a252" }, { "name": "grafana-builder", @@ -47,7 +47,7 @@ "subdir": "grafana-builder" } }, - "version": "e59d64a96a73e65ba53ba7fe05c9598974cc4a52" + "version": "3fe9a46d5fe0b70cbcabec1d2054f8ac3b3faae7" }, { "name": "grafana", @@ -77,7 +77,7 @@ "subdir": "Documentation/etcd-mixin" } }, - "version": "3ef2ad8e115449a7004b628a873e2629855ed468" + "version": "5dc12f27251ad6f5f0744ad33ea7d731480f4b87" }, { "name": "prometheus", @@ -87,7 +87,7 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "b05b5f9a300b0209689c06d70f676291f23774c4" + "version": "b5a16a8f861c29799f9a903f1e0859f513e862ed" }, { "name": "node-mixin", @@ -107,7 +107,7 @@ "subdir": "lib/promgrafonnet" } }, - "version": "15ddfa20a6921ffbd43172eb54f6bdc1bcf8d3d3" + "version": "3ad401ea3ef7fb0879298fa411772984ffa7f31f" } ] } diff --git a/kustomization.yaml b/kustomization.yaml index a580ed8e..79f9624f 100644 --- a/kustomization.yaml +++ b/kustomization.yaml @@ -62,6 +62,7 @@ resources: - ./manifests/prometheus-serviceAccount.yaml - ./manifests/prometheus-serviceMonitor.yaml - ./manifests/prometheus-serviceMonitorApiserver.yaml +- ./manifests/prometheus-serviceMonitorAwsEksCNI.yaml - ./manifests/prometheus-serviceMonitorCoreDNS.yaml - ./manifests/prometheus-serviceMonitorKubeControllerManager.yaml - ./manifests/prometheus-serviceMonitorKubeScheduler.yaml diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 1065be04..9ab00dad 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -5522,7 +5522,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5540,7 +5540,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5558,7 +5558,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5567,7 +5567,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5576,7 +5576,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5585,7 +5585,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8717,7 +8717,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(up{job=\"kubelet\"})", + "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -8801,7 +8801,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(kubelet_running_pod_count{job=\"kubelet\", instance=~\"$instance\"})", + "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -8885,7 +8885,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(kubelet_running_container_count{job=\"kubelet\", instance=~\"$instance\"})", + "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -8969,7 +8969,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -9053,7 +9053,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -9137,7 +9137,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(kubelet_node_config_error{job=\"kubelet\", instance=~\"$instance\"}[5m]))", + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -9217,7 +9217,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -9308,7 +9308,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -9412,7 +9412,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -9516,14 +9516,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} pod", "refId": "A" }, { - "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} worker", @@ -9614,14 +9614,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} pod", "refId": "A" }, { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} worker", @@ -9727,7 +9727,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", @@ -9820,7 +9820,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", @@ -9926,7 +9926,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", @@ -10030,7 +10030,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{operation_type}}", @@ -10121,7 +10121,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -10226,7 +10226,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -10317,7 +10317,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -10421,7 +10421,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -10525,28 +10525,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", @@ -10650,7 +10650,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{verb}} {{url}}", @@ -10754,7 +10754,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"kubelet\",instance=~\"$instance\"}", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -10845,7 +10845,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"kubelet\",instance=~\"$instance\"}[5m])", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -10936,7 +10936,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"kubelet\",instance=~\"$instance\"}", + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -11020,6 +11020,32 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, @@ -11030,7 +11056,7 @@ items: "options": [ ], - "query": "label_values(kubelet_runtime_operations{job=\"kubelet\"}, instance)", + "query": "label_values(kubelet_runtime_operations{cluster=\"$cluster\", job=\"kubelet\"}, instance)", "refresh": 2, "regex": "", "sort": 0, diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 007b3548..63a606c4 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -79,6 +79,22 @@ spec: rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m]) ) * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate + - expr: | + container_memory_working_set_bytes{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_working_set_bytes + - expr: | + container_memory_rss{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_rss + - expr: | + container_memory_cache{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_cache + - expr: | + container_memory_swap{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_swap - expr: | sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace) record: namespace:container_memory_usage_bytes:sum diff --git a/manifests/prometheus-serviceMonitorAwsEksCNI.yaml b/manifests/prometheus-serviceMonitorAwsEksCNI.yaml new file mode 100644 index 00000000..07d14f1f --- /dev/null +++ b/manifests/prometheus-serviceMonitorAwsEksCNI.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: eks-cni + name: awsekscni + namespace: monitoring +spec: + endpoints: + - interval: 30s + path: /metrics + port: cni-metrics-port + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: eks-cni -- GitLab