From b867f6c9ec6a776243a82231b2ecd084e6c4ecfb Mon Sep 17 00:00:00 2001 From: Frederic Branczyk <fbranczyk@gmail.com> Date: Mon, 12 Dec 2016 22:24:52 -0800 Subject: [PATCH] correctly discover the Alertmanager cluster and ServiceMonitors --- README.md | 6 +++++- hack/cluster-monitoring/deploy | 1 + hack/cluster-monitoring/teardown | 1 + .../example-app/prometheus-frontend.yaml | 7 +++---- manifests/prometheus/prometheus-k8s-cm.yaml | 19 +++++++++++++++++++ manifests/prometheus/prometheus-k8s.yaml | 5 ----- 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 74df0c20..a751a4a9 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ which manages Prometheus servers and their configuration in a cluster. With a si * A Prometheus configuration covering monitoring of all Kubernetes core components and exporters * A default set of alerting rules on the cluster component's health * A Grafana instance serving dashboards on cluster metrics +* A three node highly available Alertmanager cluster Simply run: @@ -35,6 +36,7 @@ hack/cluster-monitoring/deploy After all pods are ready, you can reach: * Prometheus UI on node port `30900` +* Alertmanager UI on node port `30903` * Grafana on node port `30902` To tear it all down again, run: @@ -57,7 +59,9 @@ hack/example-service-monitoring/deploy ``` After all pods are ready you can reach the Prometheus server on node port `30100` and observe -how it monitors the service as specified. +how it monitors the service as specified. Same as before, this Prometheus server automatically +discovers the Alertmanager cluster deployed in the [Monitoring Kubernetes](#Monitoring-Kubernetes) +section. Teardown: diff --git a/hack/cluster-monitoring/deploy b/hack/cluster-monitoring/deploy index 6da8cd62..a096747e 100755 --- a/hack/cluster-monitoring/deploy +++ b/hack/cluster-monitoring/deploy @@ -19,3 +19,4 @@ until kctl get prometheus; do sleep 1; done kctl apply -f manifests/exporters kctl apply -f manifests/grafana kctl apply -f manifests/prometheus +kctl apply -f manifests/alertmanager diff --git a/hack/cluster-monitoring/teardown b/hack/cluster-monitoring/teardown index a6edb0ae..afa4ce14 100755 --- a/hack/cluster-monitoring/teardown +++ b/hack/cluster-monitoring/teardown @@ -11,6 +11,7 @@ kctl() { kctl delete -f manifests/exporters kctl delete -f manifests/grafana kctl delete -f manifests/prometheus +kctl delete -f manifests/alertmanager # Hack: wait a bit to let the controller delete the deployed Prometheus server. sleep 5 diff --git a/manifests/examples/example-app/prometheus-frontend.yaml b/manifests/examples/example-app/prometheus-frontend.yaml index 80fd9e04..59decb14 100644 --- a/manifests/examples/example-app/prometheus-frontend.yaml +++ b/manifests/examples/example-app/prometheus-frontend.yaml @@ -7,10 +7,9 @@ metadata: prometheus: frontend spec: version: v1.4.1 - serviceMonitors: - - selector: - matchLabels: - tier: frontend + serviceMonitorSelector: + matchLabels: + tier: frontend resources: requests: # 2Gi is default, but won't schedule if you don't have a node with >2Gi diff --git a/manifests/prometheus/prometheus-k8s-cm.yaml b/manifests/prometheus/prometheus-k8s-cm.yaml index a8846b92..f6d61cdd 100644 --- a/manifests/prometheus/prometheus-k8s-cm.yaml +++ b/manifests/prometheus/prometheus-k8s-cm.yaml @@ -1,6 +1,25 @@ apiVersion: v1 data: prometheus.yaml: | + alerting: + alertmanagers: + - kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: alertmanager-main + source_labels: + - __meta_kubernetes_service_name + - action: keep + regex: monitoring + source_labels: + - __meta_kubernetes_namespace + - action: keep + regex: web + source_labels: + - __meta_kubernetes_endpoint_port_name + scheme: http + global: scrape_interval: 15s evaluation_interval: 15s diff --git a/manifests/prometheus/prometheus-k8s.yaml b/manifests/prometheus/prometheus-k8s.yaml index 602ff146..6ed1fe90 100644 --- a/manifests/prometheus/prometheus-k8s.yaml +++ b/manifests/prometheus/prometheus-k8s.yaml @@ -13,8 +13,3 @@ spec: # production use. This value is mainly meant for demonstration/testing # purposes. memory: 400Mi - alerting: - alertmanagers: - - namespace: monitoring - name: alertmanager-main - port: web -- GitLab