From d4b581fa49b4bac7f01b9fc37a409ddecf0a2199 Mon Sep 17 00:00:00 2001
From: Frederic Branczyk <fbranczyk@gmail.com>
Date: Wed, 14 Dec 2016 17:48:54 -0800
Subject: [PATCH] use ServiceMonitors to generate Prometheus Kubernetes config

---
 assets/prometheus/prometheus.yaml             | 85 -----------------
 assets/prometheus/rules/kubernetes.rules      |  8 +-
 hack/cluster-monitoring/deploy                |  2 +-
 hack/scripts/generate-configmaps.sh           |  3 -
 .../alertmanager/alertmanager-service.yaml    |  2 +-
 manifests/alertmanager/alertmanager.yaml      |  2 +-
 .../example-app/prometheus-frontend.yaml      |  2 +-
 .../exporters/kube-state-metrics-depl.yaml    |  2 +-
 .../exporters/kube-state-metrics-svc.yaml     |  5 +-
 manifests/exporters/node-exporter-svc.yaml    |  3 +-
 manifests/k8s/minikube/kube-apiserver.yaml    | 27 ------
 manifests/k8s/self-hosted/kube-apiserver.yaml | 16 ----
 manifests/prometheus-operator.yaml            |  5 +-
 manifests/prometheus/prometheus-k8s-cm.yaml   | 92 -------------------
 .../prometheus/prometheus-k8s-rules.yaml      |  8 +-
 .../prometheus-k8s-servicemonitor.yaml        | 69 ++++++++++++++
 manifests/prometheus/prometheus-k8s-svc.yaml  |  2 +-
 manifests/prometheus/prometheus-k8s.yaml      | 12 ++-
 18 files changed, 103 insertions(+), 242 deletions(-)
 delete mode 100644 assets/prometheus/prometheus.yaml
 delete mode 100644 manifests/k8s/minikube/kube-apiserver.yaml
 delete mode 100644 manifests/k8s/self-hosted/kube-apiserver.yaml
 delete mode 100644 manifests/prometheus/prometheus-k8s-cm.yaml
 create mode 100644 manifests/prometheus/prometheus-k8s-servicemonitor.yaml

diff --git a/assets/prometheus/prometheus.yaml b/assets/prometheus/prometheus.yaml
deleted file mode 100644
index 14decf23..00000000
--- a/assets/prometheus/prometheus.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-alerting:
-  alertmanagers:
-  - kubernetes_sd_configs:
-    - role: endpoints
-    relabel_configs:
-    - action: keep
-      regex: alertmanager-main
-      source_labels:
-      - __meta_kubernetes_service_name
-    - action: keep
-      regex: monitoring
-      source_labels:
-      - __meta_kubernetes_namespace
-    - action: keep
-      regex: web
-      source_labels:
-      - __meta_kubernetes_endpoint_port_name
-    scheme: http
-
-global:
-  scrape_interval: 15s
-  evaluation_interval: 15s
-
-rule_files:
-- /etc/prometheus/rules/*.rules
-
-scrape_configs:
-- job_name: kubelets
-  scheme: https
-  tls_config:
-    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-    # Skip verification until we have resolved why the certificate validation
-    # for the kubelet on API server nodes fail.
-    insecure_skip_verify: true
-  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-  kubernetes_sd_configs:
-  - role: node
-
-# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
-# and node-exporter, which we all consider part of a default setup.
-- job_name: standard-endpoints
-  tls_config:
-    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-    # As for kubelets, certificate validation fails for the API server (node)
-    # and we circumvent it for now.
-    insecure_skip_verify: true
-  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-  kubernetes_sd_configs:
-  - role: endpoints
-
-  relabel_configs:
-  - action: keep
-    source_labels: [__meta_kubernetes_service_name]
-    regex: prometheus|node-exporter|kube-state-metrics
-  - action: replace
-    source_labels: [__meta_kubernetes_service_name]
-    target_label: job
-
-# Scrapes the endpoint lists for the kube-dns server. Which we consider
-# part of a default setup.
-- job_name: kube-components
-  tls_config:
-    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-  kubernetes_sd_configs:
-  - role: endpoints
-
-  relabel_configs:
-  - action: replace
-    source_labels: [__meta_kubernetes_service_label_k8s_app]
-    target_label: job
-  - action: keep
-    source_labels: [__meta_kubernetes_service_name]
-    regex: ".*-prometheus-discovery"
-  - action: keep
-    source_labels: [__meta_kubernetes_endpoint_port_name]
-    regex: "http-metrics.*|https-metrics.*"
-  - action: replace
-    source_labels: [__meta_kubernetes_endpoint_port_name]
-    regex: "https-metrics.*"
-    target_label: __scheme__
-    replacement: https
diff --git a/assets/prometheus/rules/kubernetes.rules b/assets/prometheus/rules/kubernetes.rules
index c0dddb92..157eb3fa 100644
--- a/assets/prometheus/rules/kubernetes.rules
+++ b/assets/prometheus/rules/kubernetes.rules
@@ -171,7 +171,7 @@ cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
   histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
 
 ALERT K8SNodeDown
-  IF up{job="kubelets"} == 0
+  IF up{job="kubelet"} == 0
   FOR 1h
   LABELS {
     service = "k8s",
@@ -226,7 +226,7 @@ ALERT K8SKubeletNodeExporterDown
   }
 
 ALERT K8SKubeletDown
-  IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
+  IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
   FOR 1h
   LABELS {
     service = "k8s",
@@ -323,7 +323,7 @@ ALERT K8SConntrackTuningMissing
   }
 
 ALERT K8STooManyOpenFiles
-  IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
+  IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
   FOR 10m
   LABELS {
     service = "k8s",
@@ -335,7 +335,7 @@ ALERT K8STooManyOpenFiles
   }
 
 ALERT K8STooManyOpenFiles
-  IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
+  IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
   FOR 10m
   LABELS {
     service = "k8s",
diff --git a/hack/cluster-monitoring/deploy b/hack/cluster-monitoring/deploy
index cbf0270e..c3e2667f 100755
--- a/hack/cluster-monitoring/deploy
+++ b/hack/cluster-monitoring/deploy
@@ -26,7 +26,6 @@ echo "done!"
 kctl apply -f manifests/exporters
 kctl apply -f manifests/grafana
 
-kctl apply -f manifests/prometheus/prometheus-k8s-cm.yaml
 kctl apply -f manifests/prometheus/prometheus-k8s-rules.yaml
 kctl apply -f manifests/prometheus/prometheus-k8s-svc.yaml
 
@@ -36,6 +35,7 @@ kctl apply -f manifests/alertmanager/alertmanager-service.yaml
 # `kubectl apply` is currently not working for third party resources so we are
 # using `kubectl create` here for the time being.
 # (https://github.com/kubernetes/kubernetes/issues/29542)
+kctl create -f manifests/prometheus/prometheus-k8s-servicemonitor.yaml
 kctl create -f manifests/prometheus/prometheus-k8s.yaml
 kctl create -f manifests/alertmanager/alertmanager.yaml
 
diff --git a/hack/scripts/generate-configmaps.sh b/hack/scripts/generate-configmaps.sh
index 6fb7b6e3..aa38878b 100755
--- a/hack/scripts/generate-configmaps.sh
+++ b/hack/scripts/generate-configmaps.sh
@@ -1,8 +1,5 @@
 #!/bin/bash
 
-# Generate Prometheus configuration ConfigMap
-kubectl create configmap --dry-run=true prometheus-k8s --from-file=assets/prometheus/prometheus.yaml -oyaml > manifests/prometheus/prometheus-k8s-cm.yaml
-
 # Generate Alert Rules ConfigMap
 kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/prometheus/rules/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
 
diff --git a/manifests/alertmanager/alertmanager-service.yaml b/manifests/alertmanager/alertmanager-service.yaml
index 86599c35..1608d14d 100644
--- a/manifests/alertmanager/alertmanager-service.yaml
+++ b/manifests/alertmanager/alertmanager-service.yaml
@@ -11,4 +11,4 @@ spec:
     protocol: TCP
     targetPort: web
   selector:
-    alertmanager: alertmanager-main
+    alertmanager: main
diff --git a/manifests/alertmanager/alertmanager.yaml b/manifests/alertmanager/alertmanager.yaml
index ce67f3bb..fbd2d452 100644
--- a/manifests/alertmanager/alertmanager.yaml
+++ b/manifests/alertmanager/alertmanager.yaml
@@ -1,7 +1,7 @@
 apiVersion: "monitoring.coreos.com/v1alpha1"
 kind: "Alertmanager"
 metadata:
-  name: "alertmanager-main"
+  name: "main"
   labels:
     alertmanager: "main"
 spec:
diff --git a/manifests/examples/example-app/prometheus-frontend.yaml b/manifests/examples/example-app/prometheus-frontend.yaml
index 59decb14..915ded6a 100644
--- a/manifests/examples/example-app/prometheus-frontend.yaml
+++ b/manifests/examples/example-app/prometheus-frontend.yaml
@@ -6,7 +6,7 @@ metadata:
   labels:
     prometheus: frontend
 spec:
-  version: v1.4.1
+  version: v1.5.2
   serviceMonitorSelector:
     matchLabels:
       tier: frontend
diff --git a/manifests/exporters/kube-state-metrics-depl.yaml b/manifests/exporters/kube-state-metrics-depl.yaml
index b044ba2e..6ef971ce 100644
--- a/manifests/exporters/kube-state-metrics-depl.yaml
+++ b/manifests/exporters/kube-state-metrics-depl.yaml
@@ -11,7 +11,7 @@ spec:
     spec:
       containers:
       - name: kube-state-metrics
-        image: gcr.io/google_containers/kube-state-metrics:v0.3.0
+        image: gcr.io/google_containers/kube-state-metrics:v0.4.1
         ports:
         - name: metrics
           containerPort: 8080
diff --git a/manifests/exporters/kube-state-metrics-svc.yaml b/manifests/exporters/kube-state-metrics-svc.yaml
index 8b68484b..607869e1 100644
--- a/manifests/exporters/kube-state-metrics-svc.yaml
+++ b/manifests/exporters/kube-state-metrics-svc.yaml
@@ -3,10 +3,13 @@ kind: Service
 metadata:
   labels:
     app: kube-state-metrics
+    k8s-app: kube-state-metrics
+  annotations:
+    alpha.monitoring.coreos.com/non-namespaced: "true"
   name: kube-state-metrics
 spec:
   ports:
-  - name: metrics 
+  - name: http-metrics
     port: 8080
     targetPort: metrics
     protocol: TCP
diff --git a/manifests/exporters/node-exporter-svc.yaml b/manifests/exporters/node-exporter-svc.yaml
index f2d24a42..46b1a3fd 100644
--- a/manifests/exporters/node-exporter-svc.yaml
+++ b/manifests/exporters/node-exporter-svc.yaml
@@ -3,12 +3,13 @@ kind: Service
 metadata:
   labels:
     app: node-exporter
+    k8s-app: node-exporter
   name: node-exporter
 spec:
   type: ClusterIP
   clusterIP: None
   ports:
-  - name: metrics
+  - name: http-metrics
     port: 9100
     protocol: TCP
   selector:
diff --git a/manifests/k8s/minikube/kube-apiserver.yaml b/manifests/k8s/minikube/kube-apiserver.yaml
deleted file mode 100644
index 2b35a4ec..00000000
--- a/manifests/k8s/minikube/kube-apiserver.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: kube-apiserver-prometheus-discovery
-  labels:
-    k8s-app: kubernetes
-spec:
-  type: ClusterIP
-  clusterIP: None
-  ports:
-  - name: https-metrics
-    port: 8443
-    protocol: TCP
----
-apiVersion: v1
-kind: Endpoints
-metadata:
-  name: kube-apiserver-prometheus-discovery
-  labels:
-    k8s-app: kubernetes
-subsets:
-- addresses:
-  - ip: 192.168.99.100
-  ports:
-  - name: https-metrics
-    port: 8443
-    protocol: TCP
diff --git a/manifests/k8s/self-hosted/kube-apiserver.yaml b/manifests/k8s/self-hosted/kube-apiserver.yaml
deleted file mode 100644
index 72b1c08f..00000000
--- a/manifests/k8s/self-hosted/kube-apiserver.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: kube-apiserver-prometheus-discovery
-  labels:
-    k8s-app: kubernetes
-spec:
-  selector:
-    k8s-app: kube-apiserver
-  type: ClusterIP
-  clusterIP: None
-  ports:
-  - name: https-metrics
-    port: 443
-    targetPort: 443
-    protocol: TCP
diff --git a/manifests/prometheus-operator.yaml b/manifests/prometheus-operator.yaml
index bb1dab9a..78bc11d7 100644
--- a/manifests/prometheus-operator.yaml
+++ b/manifests/prometheus-operator.yaml
@@ -13,7 +13,10 @@ spec:
     spec:
       containers:
        - name: prometheus-operator
-         image: quay.io/coreos/prometheus-operator:v0.2.1
+         image: quay.io/coreos/prometheus-operator:v0.6.0
+         args:
+           - "--kubelet-object=kube-system/kubelet"
+           - "--config-reloader-image=quay.io/coreos/configmap-reload:latest"
          resources:
            requests:
              cpu: 100m
diff --git a/manifests/prometheus/prometheus-k8s-cm.yaml b/manifests/prometheus/prometheus-k8s-cm.yaml
deleted file mode 100644
index 5e4a9cd3..00000000
--- a/manifests/prometheus/prometheus-k8s-cm.yaml
+++ /dev/null
@@ -1,92 +0,0 @@
-apiVersion: v1
-data:
-  prometheus.yaml: |
-    alerting:
-      alertmanagers:
-      - kubernetes_sd_configs:
-        - role: endpoints
-        relabel_configs:
-        - action: keep
-          regex: alertmanager-main
-          source_labels:
-          - __meta_kubernetes_service_name
-        - action: keep
-          regex: monitoring
-          source_labels:
-          - __meta_kubernetes_namespace
-        - action: keep
-          regex: web
-          source_labels:
-          - __meta_kubernetes_endpoint_port_name
-        scheme: http
-
-    global:
-      scrape_interval: 15s
-      evaluation_interval: 15s
-
-    rule_files:
-    - /etc/prometheus/rules/*.rules
-
-    scrape_configs:
-    - job_name: kubelets
-      scheme: https
-      tls_config:
-        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-        # Skip verification until we have resolved why the certificate validation
-        # for the kubelet on API server nodes fail.
-        insecure_skip_verify: true
-      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-      kubernetes_sd_configs:
-      - role: node
-
-    # Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
-    # and node-exporter, which we all consider part of a default setup.
-    - job_name: standard-endpoints
-      tls_config:
-        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-        # As for kubelets, certificate validation fails for the API server (node)
-        # and we circumvent it for now.
-        insecure_skip_verify: true
-      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-      kubernetes_sd_configs:
-      - role: endpoints
-
-      relabel_configs:
-      - action: keep
-        source_labels: [__meta_kubernetes_service_name]
-        regex: prometheus|node-exporter|kube-state-metrics
-      - action: replace
-        source_labels: [__meta_kubernetes_service_name]
-        target_label: job
-
-    # Scrapes the endpoint lists for the kube-dns server. Which we consider
-    # part of a default setup.
-    - job_name: kube-components
-      tls_config:
-        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-      kubernetes_sd_configs:
-      - role: endpoints
-
-      relabel_configs:
-      - action: replace
-        source_labels: [__meta_kubernetes_service_label_k8s_app]
-        target_label: job
-      - action: keep
-        source_labels: [__meta_kubernetes_service_name]
-        regex: ".*-prometheus-discovery"
-      - action: keep
-        source_labels: [__meta_kubernetes_endpoint_port_name]
-        regex: "http-metrics.*|https-metrics.*"
-      - action: replace
-        source_labels: [__meta_kubernetes_endpoint_port_name]
-        regex: "https-metrics.*"
-        target_label: __scheme__
-        replacement: https
-kind: ConfigMap
-metadata:
-  creationTimestamp: null
-  name: prometheus-k8s
diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml
index 6e83500e..08f6dddc 100644
--- a/manifests/prometheus/prometheus-k8s-rules.yaml
+++ b/manifests/prometheus/prometheus-k8s-rules.yaml
@@ -226,7 +226,7 @@ data:
       histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
 
     ALERT K8SNodeDown
-      IF up{job="kubelets"} == 0
+      IF up{job="kubelet"} == 0
       FOR 1h
       LABELS {
         service = "k8s",
@@ -281,7 +281,7 @@ data:
       }
 
     ALERT K8SKubeletDown
-      IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
+      IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
       FOR 1h
       LABELS {
         service = "k8s",
@@ -378,7 +378,7 @@ data:
       }
 
     ALERT K8STooManyOpenFiles
-      IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
+      IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
       FOR 10m
       LABELS {
         service = "k8s",
@@ -390,7 +390,7 @@ data:
       }
 
     ALERT K8STooManyOpenFiles
-      IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
+      IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
       FOR 10m
       LABELS {
         service = "k8s",
diff --git a/manifests/prometheus/prometheus-k8s-servicemonitor.yaml b/manifests/prometheus/prometheus-k8s-servicemonitor.yaml
new file mode 100644
index 00000000..dbad7e5f
--- /dev/null
+++ b/manifests/prometheus/prometheus-k8s-servicemonitor.yaml
@@ -0,0 +1,69 @@
+apiVersion: monitoring.coreos.com/v1alpha1
+kind: ServiceMonitor
+metadata:
+  name: kube-apiserver
+  labels:
+    k8s-apps: https
+spec:
+  jobLabel: provider
+  selector:
+    matchLabels:
+      component: apiserver
+      provider: kubernetes
+  namespaceSelector:
+    matchNames:
+    - default
+  endpoints:
+  - port: https
+    interval: 15s
+    scheme: https
+    tlsConfig:
+      caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+      insecureSkipVerify: true
+    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+---
+apiVersion: monitoring.coreos.com/v1alpha1
+kind: ServiceMonitor
+metadata:
+  name: k8s-apps-https
+  labels:
+    k8s-apps: https
+spec:
+  jobLabel: k8s-app
+  selector:
+    matchExpressions:
+    - {key: k8s-app, operator: Exists}
+  namespaceSelector:
+    matchNames:
+    - kube-system
+  endpoints:
+  - port: https-metrics
+    interval: 15s
+    scheme: https
+    tlsConfig:
+      caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+      insecureSkipVerify: true
+    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+---
+apiVersion: monitoring.coreos.com/v1alpha1
+kind: ServiceMonitor
+metadata:
+  name: k8s-apps-http
+  labels:
+    k8s-apps: http
+spec:
+  jobLabel: k8s-app
+  selector:
+    matchExpressions:
+    - {key: k8s-app, operator: Exists}
+  namespaceSelector:
+    matchNames:
+    - kube-system
+    - monitoring
+  endpoints:
+  - port: http-metrics
+    interval: 15s
+  - port: http-metrics-dnsmasq
+    interval: 15s
+  - port: http-metrics-skydns
+    interval: 15s
diff --git a/manifests/prometheus/prometheus-k8s-svc.yaml b/manifests/prometheus/prometheus-k8s-svc.yaml
index d3d25d2b..a558f30f 100644
--- a/manifests/prometheus/prometheus-k8s-svc.yaml
+++ b/manifests/prometheus/prometheus-k8s-svc.yaml
@@ -11,4 +11,4 @@ spec:
     protocol: TCP
     targetPort: web
   selector:
-    prometheus: prometheus-k8s
+    prometheus: k8s
diff --git a/manifests/prometheus/prometheus-k8s.yaml b/manifests/prometheus/prometheus-k8s.yaml
index 6ed1fe90..a593f041 100644
--- a/manifests/prometheus/prometheus-k8s.yaml
+++ b/manifests/prometheus/prometheus-k8s.yaml
@@ -1,11 +1,14 @@
 apiVersion: monitoring.coreos.com/v1alpha1
 kind: Prometheus
 metadata:
-  name: prometheus-k8s
+  name: k8s
   labels:
     prometheus: k8s
 spec:
-  version: v1.4.1
+  version: v1.5.2
+  serviceMonitorSelector:
+    matchExpression:
+    - {key: k8s-apps, operator: Exists}
   resources:
     requests:
       # 2Gi is default, but won't schedule if you don't have a node with >2Gi
@@ -13,3 +16,8 @@ spec:
       # production use. This value is mainly meant for demonstration/testing
       # purposes.
       memory: 400Mi
+  alerting:
+    alertmanagers:
+    - namespace: monitoring
+      name: alertmanager-main
+      port: web
-- 
GitLab