diff --git a/examples/weave-net-example.jsonnet b/examples/weave-net-example.jsonnet index 9d708761bc7008134abd7d9f28b7cd0a001462ab..a7eca242c2402e53190d6d8906ed87785c519480 100644 --- a/examples/weave-net-example.jsonnet +++ b/examples/weave-net-example.jsonnet @@ -1,12 +1,12 @@ local kp = (import 'kube-prometheus/main.libsonnet') + - (import 'kube-prometheus/addons/weave-net.libsonnet') + { + (import 'kube-prometheus/addons/weave-net/weave-net.libsonnet') + { values+:: { common+: { namespace: 'monitoring', }, }, prometheus+: { - prometheusRule+: { + prometheusRuleWeaveNet+: { spec+: { groups: std.map( function(group) diff --git a/jsonnet/kube-prometheus/addons/weave-net.libsonnet b/jsonnet/kube-prometheus/addons/weave-net.libsonnet deleted file mode 100644 index 0755e60c83143931fb6861b34db7ca87e25ea68f..0000000000000000000000000000000000000000 --- a/jsonnet/kube-prometheus/addons/weave-net.libsonnet +++ /dev/null @@ -1,196 +0,0 @@ -{ - prometheus+: { - serviceWeaveNet: { - apiVersion: 'v1', - kind: 'Service', - metadata: { - name: 'weave-net', - namespace: 'kube-system', - labels: { 'app.kubernetes.io/name': 'weave-net' }, - }, - spec: { - ports: [ - { name: 'weave-net-metrics', targetPort: 6782, port: 6782 }, - ], - selector: { name: 'weave-net' }, - clusterIP: 'None', - }, - }, - serviceMonitorWeaveNet: { - apiVersion: 'monitoring.coreos.com/v1', - kind: 'ServiceMonitor', - metadata: { - name: 'weave-net', - labels: { - 'app.kubernetes.io/name': 'weave-net', - }, - namespace: 'monitoring', - }, - spec: { - jobLabel: 'app.kubernetes.io/name', - endpoints: [ - { - port: 'weave-net-metrics', - path: '/metrics', - interval: '15s', - }, - ], - namespaceSelector: { - matchNames: [ - 'kube-system', - ], - }, - selector: { - matchLabels: { - 'app.kubernetes.io/name': 'weave-net', - }, - }, - }, - }, - }, - prometheusRules+: { - groups+: [ - { - name: 'weave-net', - rules: [ - { - alert: 'WeaveNetIPAMSplitBrain', - expr: 'max(weave_ipam_unreachable_percentage) - min(weave_ipam_unreachable_percentage) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Percentage of all IP addresses owned by unreachable peers is not same for every node.', - description: 'actionable: Weave Net network has a split brain problem. Please find the problem and fix it.', - }, - }, - { - alert: 'WeaveNetIPAMUnreachable', - expr: 'weave_ipam_unreachable_percentage > 25', - 'for': '10m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Percentage of all IP addresses owned by unreachable peers is above threshold.', - description: 'actionable: Please find the problem and fix it.', - }, - }, - { - alert: 'WeaveNetIPAMPendingAllocates', - expr: 'sum(weave_ipam_pending_allocates) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Number of pending allocates is above the threshold.', - description: 'actionable: Please find the problem and fix it.', - }, - }, - { - alert: 'WeaveNetIPAMPendingClaims', - expr: 'sum(weave_ipam_pending_claims) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Number of pending claims is above the threshold.', - description: 'actionable: Please find the problem and fix it.', - }, - }, - { - alert: 'WeaveNetFastDPFlowsLow', - expr: 'sum(weave_flows) < 15000', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Number of FastDP flows is below the threshold.', - description: 'actionable: Please find the reason for FastDP flows to go below the threshold and fix it.', - }, - }, - { - alert: 'WeaveNetFastDPFlowsOff', - expr: 'sum(weave_flows == bool 0) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'FastDP flows is zero.', - description: 'actionable: Please find the reason for FastDP flows to be off and fix it.', - }, - }, - { - alert: 'WeaveNetHighConnectionTerminationRate', - expr: 'rate(weave_connection_terminations_total[5m]) > 0.1', - 'for': '5m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'A lot of connections are getting terminated.', - description: 'actionable: Please find the reason for the high connection termination rate and fix it.', - }, - }, - { - alert: 'WeaveNetConnectionsConnecting', - expr: 'sum(weave_connections{state="connecting"}) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'A lot of connections are in connecting state.', - description: 'actionable: Please find the reason for this and fix it.', - }, - }, - { - alert: 'WeaveNetConnectionsRetying', - expr: 'sum(weave_connections{state="retrying"}) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'A lot of connections are in retrying state.', - description: 'actionable: Please find the reason for this and fix it.', - }, - }, - { - alert: 'WeaveNetConnectionsPending', - expr: 'sum(weave_connections{state="pending"}) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'A lot of connections are in pending state.', - description: 'actionable: Please find the reason for this and fix it.', - }, - }, - { - alert: 'WeaveNetConnectionsFailed', - expr: 'sum(weave_connections{state="failed"}) > 0', - 'for': '3m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'A lot of connections are in failed state.', - description: 'actionable: Please find the reason and fix it.', - }, - }, - ], - }, - ], - }, - grafanaDashboards+:: { - 'weave-net.json': (import './grafana-weave-net.json'), - 'weave-net-cluster.json': (import './grafana-weave-net-cluster.json'), - }, -} diff --git a/jsonnet/kube-prometheus/addons/weave-net/alerts.libsonnet b/jsonnet/kube-prometheus/addons/weave-net/alerts.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..c0ca940a9ca7260f99c0e2cacbbdec3ce1d5b71a --- /dev/null +++ b/jsonnet/kube-prometheus/addons/weave-net/alerts.libsonnet @@ -0,0 +1,134 @@ +[ + { + alert: 'WeaveNetIPAMSplitBrain', + expr: 'max(weave_ipam_unreachable_percentage) - min(weave_ipam_unreachable_percentage) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Percentage of all IP addresses owned by unreachable peers is not same for every node.', + description: 'actionable: Weave Net network has a split brain problem. Please find the problem and fix it.', + }, + }, + { + alert: 'WeaveNetIPAMUnreachable', + expr: 'weave_ipam_unreachable_percentage > 25', + 'for': '10m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Percentage of all IP addresses owned by unreachable peers is above threshold.', + description: 'actionable: Please find the problem and fix it.', + }, + }, + { + alert: 'WeaveNetIPAMPendingAllocates', + expr: 'sum(weave_ipam_pending_allocates) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Number of pending allocates is above the threshold.', + description: 'actionable: Please find the problem and fix it.', + }, + }, + { + alert: 'WeaveNetIPAMPendingClaims', + expr: 'sum(weave_ipam_pending_claims) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Number of pending claims is above the threshold.', + description: 'actionable: Please find the problem and fix it.', + }, + }, + { + alert: 'WeaveNetFastDPFlowsLow', + expr: 'sum(weave_flows) < 15000', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Number of FastDP flows is below the threshold.', + description: 'actionable: Please find the reason for FastDP flows to go below the threshold and fix it.', + }, + }, + { + alert: 'WeaveNetFastDPFlowsOff', + expr: 'sum(weave_flows == bool 0) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'FastDP flows is zero.', + description: 'actionable: Please find the reason for FastDP flows to be off and fix it.', + }, + }, + { + alert: 'WeaveNetHighConnectionTerminationRate', + expr: 'rate(weave_connection_terminations_total[5m]) > 0.1', + 'for': '5m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'A lot of connections are getting terminated.', + description: 'actionable: Please find the reason for the high connection termination rate and fix it.', + }, + }, + { + alert: 'WeaveNetConnectionsConnecting', + expr: 'sum(weave_connections{state="connecting"}) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'A lot of connections are in connecting state.', + description: 'actionable: Please find the reason for this and fix it.', + }, + }, + { + alert: 'WeaveNetConnectionsRetying', + expr: 'sum(weave_connections{state="retrying"}) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'A lot of connections are in retrying state.', + description: 'actionable: Please find the reason for this and fix it.', + }, + }, + { + alert: 'WeaveNetConnectionsPending', + expr: 'sum(weave_connections{state="pending"}) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'A lot of connections are in pending state.', + description: 'actionable: Please find the reason for this and fix it.', + }, + }, + { + alert: 'WeaveNetConnectionsFailed', + expr: 'sum(weave_connections{state="failed"}) > 0', + 'for': '3m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'A lot of connections are in failed state.', + description: 'actionable: Please find the reason and fix it.', + }, + }, +] diff --git a/jsonnet/kube-prometheus/addons/grafana-weave-net-cluster.json b/jsonnet/kube-prometheus/addons/weave-net/grafana-weave-net-cluster.json similarity index 100% rename from jsonnet/kube-prometheus/addons/grafana-weave-net-cluster.json rename to jsonnet/kube-prometheus/addons/weave-net/grafana-weave-net-cluster.json diff --git a/jsonnet/kube-prometheus/addons/grafana-weave-net.json b/jsonnet/kube-prometheus/addons/weave-net/grafana-weave-net.json similarity index 100% rename from jsonnet/kube-prometheus/addons/grafana-weave-net.json rename to jsonnet/kube-prometheus/addons/weave-net/grafana-weave-net.json diff --git a/jsonnet/kube-prometheus/addons/weave-net/weave-net.libsonnet b/jsonnet/kube-prometheus/addons/weave-net/weave-net.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..71596f4568aac7fcd46869dd3f84c8406ef9fd2d --- /dev/null +++ b/jsonnet/kube-prometheus/addons/weave-net/weave-net.libsonnet @@ -0,0 +1,73 @@ +{ + prometheus+: { + local p = self, + serviceWeaveNet: { + apiVersion: 'v1', + kind: 'Service', + metadata: { + name: 'weave-net', + namespace: 'kube-system', + labels: { 'app.kubernetes.io/name': 'weave-net' }, + }, + spec: { + ports: [ + { name: 'weave-net-metrics', targetPort: 6782, port: 6782 }, + ], + selector: { name: 'weave-net' }, + clusterIP: 'None', + }, + }, + serviceMonitorWeaveNet: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'weave-net', + labels: { + 'app.kubernetes.io/name': 'weave-net', + }, + namespace: 'monitoring', + }, + spec: { + jobLabel: 'app.kubernetes.io/name', + endpoints: [ + { + port: 'weave-net-metrics', + path: '/metrics', + interval: '15s', + }, + ], + namespaceSelector: { + matchNames: [ + 'kube-system', + ], + }, + selector: { + matchLabels: { + 'app.kubernetes.io/name': 'weave-net', + }, + }, + }, + }, + prometheusRuleWeaveNet: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'PrometheusRule', + metadata: { + labels: p.config.mixin.ruleLabels, + name: 'weave-net-rules', + namespace: p.config.namespace, + }, + spec: { + groups: [{ + name: 'weave-net', + rules: (import './alerts.libsonnet'), + }], + }, + }, + mixin+:: { + grafanaDashboards+:: { + 'weave-net.json': (import './grafana-weave-net.json'), + 'weave-net-cluster.json': (import './grafana-weave-net-cluster.json'), + }, + }, + }, +}