diff --git a/README.md b/README.md index 8ea3c718c4e96b4ccf242b098dfced8876637653..3bea1c651c02f7f180eaf4324e074b93732f8f7a 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,6 @@ controller, the [node_exporter](https://github.com/prometheus/node_exporter), ```bash export KUBECONFIG=<path> # defaults to "~/.kube/config" -export KUBE_NAMESPACE=<ns> # defaults to "default" hack/cluster-monitoring/deploy ``` @@ -39,9 +38,9 @@ To tear it all down again, run: hack/cluster-monitoring/teardown ``` -__All services in the manifest still contain the `prometheus.io/scrape = true` annotations. It is not +*All services in the manifest still contain the `prometheus.io/scrape = true` annotations. It is not used by the Prometheus controller. They remain for convential deployments as in -[this example configuration](https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml).__ +[this example configuration](https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml).* ## Monitoring custom services @@ -64,4 +63,12 @@ Teardown: hack/example-service-monitoring/teardown ``` +## Roadmap + +* Incorporate [Alertmanager controller](https://github.com/coreos/kube-alertmanager-controller) +* Grafana controller that dynamically discovers and deploys dashboards from ConfigMaps +* Collection of base alerting for cluster monitoring +* KPM/Helm packages to easily provide production-ready cluster-monitoring setup (essentially contents of `hack/cluster-monitoring`) +* Add meta-monitoring to default cluster monitoring setup + diff --git a/hack/cluster-monitoring/deploy b/hack/cluster-monitoring/deploy index 4e3374309b010478a0b94d79e3ec933755eb7d10..49aa10d7520d25b1e3c304b770050832c2663c11 100755 --- a/hack/cluster-monitoring/deploy +++ b/hack/cluster-monitoring/deploy @@ -1,14 +1,23 @@ #!/usr/bin/env bash +set -o xtrace + if [ -z "${KUBECONFIG}" ]; then KUBECONFIG=~/.kube/config fi -if [ -z "${KUBE_NAMESPACE}" ]; then - KUBE_NAMESPACE=default -fi +kubectl --kubeconfig="$KUBECONFIG" create namespace monitoring + +kctl() { + kubectl --kubeconfig="$KUBECONFIG" -n "monitoring" "$@" +} + +kctl apply -f manifests/controllers/prometheus-controller.yaml + +# Wait for TPRs to be ready. +until kctl get servicemonitor; do sleep 0.1; done +until kctl get prometheus; do sleep 0.1; done -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" create -f manifests/controllers/prometheus-controller.yaml -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" create -f manifests/exporters -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" create -f manifests/grafana -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" create -f manifests/prometheus +kctl apply -f manifests/exporters +kctl apply -f manifests/grafana +kctl apply -f manifests/prometheus diff --git a/hack/cluster-monitoring/teardown b/hack/cluster-monitoring/teardown index bf1e88eff88b62c5844cbb3cab0d03e75d093beb..54b35adabf636581cb97aa24718db1182853f8a7 100755 --- a/hack/cluster-monitoring/teardown +++ b/hack/cluster-monitoring/teardown @@ -4,11 +4,15 @@ if [ -z "${KUBECONFIG}" ]; then KUBECONFIG=~/.kube/config fi -if [ -z "${KUBE_NAMESPACE}" ]; then - KUBE_NAMESPACE=default -fi - -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" delete -f manifests/exporters -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" delete -f manifests/grafana -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" delete -f manifests/prometheus -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" delete -f manifests/controllers/prometheus-controller.yaml +kctl() { + kubectl --kubeconfig="$KUBECONFIG" -n "monitoring" "$@" +} + +kctl delete -f manifests/exporters +kctl delete -f manifests/grafana +kctl delete -f manifests/prometheus + +# Hack: wait a bit to let the controller delete the deployed Prometheus server. +sleep 5 + +kctl delete -f manifests/controllers/prometheus-controller.yaml diff --git a/hack/example-service-monitoring/teardown b/hack/example-service-monitoring/teardown index 2be7dcfd4ad6d0552021c1dca8549d1ed49da8ee..448ff8a826a986c51aef796b01a657b5b7b540a3 100755 --- a/hack/example-service-monitoring/teardown +++ b/hack/example-service-monitoring/teardown @@ -4,4 +4,4 @@ if [ -z "${KUBECONFIG}" ]; then KUBECONFIG=~/.kube/config fi -kubectl --kubeconfig="$KUBECONFIG" -n "$KUBE_NAMESPACE" delete -f manifests/examples/example-app \ No newline at end of file +kubectl --kubeconfig="$KUBECONFIG" delete -f manifests/examples/example-app \ No newline at end of file diff --git a/manifests/controllers/alertmanager-controller.yaml b/manifests/controllers/alertmanager-controller.yaml index 7f5eb7ee585c73a005252c409e6b78e73827170a..65c716747615bc9edc1366caecb05356e46121e8 100644 --- a/manifests/controllers/alertmanager-controller.yaml +++ b/manifests/controllers/alertmanager-controller.yaml @@ -20,7 +20,4 @@ spec: memory: 50Mi limits: cpu: 200m - memory: 100Mi - imagePullSecrets: - - name: coreos-quay - + memory: 100Mi \ No newline at end of file diff --git a/manifests/controllers/prometheus-controller.yaml b/manifests/controllers/prometheus-controller.yaml index 7e62e605cfc06021234167df5d6a66d1bfbc5bf1..0b4092ae3cd83387d151ceeb2d5835bfb4caf549 100644 --- a/manifests/controllers/prometheus-controller.yaml +++ b/manifests/controllers/prometheus-controller.yaml @@ -20,7 +20,4 @@ spec: memory: 50Mi limits: cpu: 200m - memory: 100Mi - imagePullSecrets: - - name: coreos-quay - + memory: 100Mi \ No newline at end of file diff --git a/manifests/examples/example-app/prometheus-frontend-svc.yaml b/manifests/examples/example-app/prometheus-frontend-svc.yaml index 157d2f990ecf32b832753196803b9f8ca329ca67..8295915b99967379c0a0b33a851de87f42c50bf1 100644 --- a/manifests/examples/example-app/prometheus-frontend-svc.yaml +++ b/manifests/examples/example-app/prometheus-frontend-svc.yaml @@ -12,4 +12,5 @@ spec: protocol: TCP targetPort: web selector: - prometheus.coreos.com: prometheus-frontend + prometheus.coreos.com/type: prometheus + prometheus.coreos.com/name: prometheus-frontend diff --git a/manifests/grafana/grafana-depl.yaml b/manifests/grafana/grafana-depl.yaml index b422cf010ff0282dbea704015c6f038865410661..25d2d209d9c0c4aef6fa9b8384bf8c14f14e946c 100644 --- a/manifests/grafana/grafana-depl.yaml +++ b/manifests/grafana/grafana-depl.yaml @@ -54,6 +54,4 @@ spec: - name: grafana-dashboards configMap: name: grafana-dashboards - imagePullSecrets: - - name: coreos-quay diff --git a/manifests/grafana/grafana-svc.yaml b/manifests/grafana/grafana-svc.yaml index 11a12fc1eba8a5386879dca62dce348f5ef17680..0fd4e87bbf015f81beca8389d963f2c82753f798 100644 --- a/manifests/grafana/grafana-svc.yaml +++ b/manifests/grafana/grafana-svc.yaml @@ -7,11 +7,11 @@ metadata: annotations: prometheus.io/scrape: 'true' spec: + type: NodePort ports: - name: web port: 3000 protocol: TCP nodePort: 30902 selector: - app: grafana - type: NodePort \ No newline at end of file + app: grafana \ No newline at end of file diff --git a/manifests/prometheus/prometheus-k8s-cm.yaml b/manifests/prometheus/prometheus-k8s-cm.yaml index 0ced55767df7255a4aa8b69ac2ad419ac29882fb..09b8e0b5b3481868c1764d69865a005db1e56779 100644 --- a/manifests/prometheus/prometheus-k8s-cm.yaml +++ b/manifests/prometheus/prometheus-k8s-cm.yaml @@ -7,10 +7,6 @@ data: global: evaluation_interval: 30s - # Add your etcd scrape config here. We cannot default here as etcd is a - # prerequisite for Kubernetes. - # TODO(fabxc): potentially make this configurable via KPM in the future. - scrape_configs: - job_name: kubelets scrape_interval: 20s diff --git a/manifests/prometheus/prometheus-k8s-svc.yaml b/manifests/prometheus/prometheus-k8s-svc.yaml index c5d79d02f0787f55f3603eb9527a36a92414d59f..d738d2545fb0777bd5d0fa7881a0c80e098ec602 100644 --- a/manifests/prometheus/prometheus-k8s-svc.yaml +++ b/manifests/prometheus/prometheus-k8s-svc.yaml @@ -11,4 +11,5 @@ spec: protocol: TCP targetPort: web selector: - prometheus.coreos.com: prometheus-k8s + prometheus.coreos.com/type: prometheus + prometheus.coreos.com/name: prometheus-k8s