Skip to content
Snippets Groups Projects
Unverified Commit b7fe018d authored by Maxime Brunet's avatar Maxime Brunet
Browse files

eks: Revert back to `awscni_total_ip_addresses`-based alert

parent b9c73c7b
No related branches found
No related tags found
No related merge requests found
(import '../addons/managed-cluster.libsonnet') + { (import '../addons/managed-cluster.libsonnet') + {
values+:: { values+:: {
eks: { awsVpcCni: {
minimumAvailableIPs: 10, // `minimumWarmIPs` should be inferior or equal to `WARM_IP_TARGET`.
minimumAvailableIPsTime: '10m', //
// References:
// https://github.com/aws/amazon-vpc-cni-k8s/blob/v1.9.0/docs/eni-and-ip-target.md
// https://github.com/aws/amazon-vpc-cni-k8s/blob/v1.9.0/pkg/ipamd/ipamd.go#L61-L71
minimumWarmIPs: 10,
minimumWarmIPsTime: '10m',
}, },
}, },
kubernetesControlPlane+: { kubernetesControlPlane+: {
...@@ -17,7 +22,8 @@ ...@@ -17,7 +22,8 @@
], ],
}, },
}, },
AwsEksCniMetricService: {
serviceAwsVpcCniMetrics: {
apiVersion: 'v1', apiVersion: 'v1',
kind: 'Service', kind: 'Service',
metadata: { metadata: {
...@@ -38,14 +44,14 @@ ...@@ -38,14 +44,14 @@
}, },
}, },
serviceMonitorAwsEksCNI: { serviceMonitorAwsVpcCni: {
apiVersion: 'monitoring.coreos.com/v1', apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor', kind: 'ServiceMonitor',
metadata: { metadata: {
name: 'awsekscni', name: 'aws-node',
namespace: $.values.common.namespace, namespace: $.values.common.namespace,
labels: { labels: {
'app.kubernetes.io/name': 'eks-cni', 'app.kubernetes.io/name': 'aws-node',
}, },
}, },
spec: { spec: {
...@@ -78,30 +84,34 @@ ...@@ -78,30 +84,34 @@
], ],
}, },
}, },
prometheusRuleEksCNI: {
prometheusRuleAwsVpcCni: {
apiVersion: 'monitoring.coreos.com/v1', apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule', kind: 'PrometheusRule',
metadata: { metadata: {
labels: $.prometheus._config.commonLabels + $.prometheus._config.mixin.ruleLabels, labels: $.prometheus._config.commonLabels + $.prometheus._config.mixin.ruleLabels,
name: 'eks-rules', name: 'aws-vpc-cni-rules',
namespace: $.prometheus._config.namespace, namespace: $.prometheus._config.namespace,
}, },
spec: { spec: {
groups: [ groups: [
{ {
name: 'kube-prometheus-eks.rules', name: 'kube-prometheus-aws-vpc-cni.rules',
rules: [ rules: [
{ {
expr: 'sum by(instance) (awscni_ip_max) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $.values.eks.minimumAvailableIPs, expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $.values.awsVpcCni.minimumWarmIPs,
labels: { labels: {
severity: 'critical', severity: 'critical',
}, },
annotations: { annotations: {
summary: 'EKS CNI is running low on available IPs', summary: 'AWS VPC CNI has a low warm IP pool',
description: 'Instance {{ $labels.instance }} has only {{ $value }} IPs available which is lower than set threshold of %s' % $.values.eks.minimumAvailableIPs, description: |||
Instance {{ $labels.instance }} has only {{ $value }} warm IPs which is lower than set threshold of %s.
It could mean the current subnet is out of available IP addresses or the CNI is unable to request them from the EC2 API.
||| % $.values.awsVpcCni.minimumWarmIPs,
}, },
'for': $.values.eks.minimumAvailableIPsTime, 'for': $.values.awsVpcCni.minimumWarmIPsTime,
alert: 'EksCNILowAvailableIPs', alert: 'AwsVpcCniWarmIPsLow',
}, },
], ],
}, },
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment