diff --git a/assets/prometheus/prometheus.yaml b/assets/prometheus/prometheus.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0c6cb0e67129b76965c248433524cd0afe0829e --- /dev/null +++ b/assets/prometheus/prometheus.yaml @@ -0,0 +1,68 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +rule_files: +- /etc/prometheus/rules/*.rules + +scrape_configs: +- job_name: kubelets + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # Skip verification until we have resolved why the certificate validation + # for the kubelet on API server nodes fail. + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + +# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics, +# and node-exporter, which we all consider part of a default setup. +- job_name: standard-endpoints + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # As for kubelets, certificate validation fails for the API server (node) + # and we circumvent it for now. + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_service_name] + regex: kubernetes|node-exporter|kube-state-metrics|etcd-k8s + - action: replace + source_labels: [__meta_kubernetes_service_name] + target_label: job + - action: replace + source_labels: [__meta_kubernetes_service_name] + regex: kubernetes + target_label: __scheme__ + replacement: https + +# Scrapes the endpoint lists for the kube-dns server. Which we consider +# part of a default setup. +- job_name: kube-components + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - action: replace + source_labels: [__meta_kubernetes_service_name] + target_label: job + regex: "kube-(.*)-prometheus-discovery" + replacement: "kube-${1}" + - action: keep + source_labels: [__meta_kubernetes_service_name] + regex: "kube-(.*)-prometheus-discovery" + - action: keep + source_labels: [__meta_kubernetes_endpoint_port_name] + regex: "prometheus" diff --git a/assets/alerts/etcd2.rules b/assets/prometheus/rules/etcd2.rules similarity index 100% rename from assets/alerts/etcd2.rules rename to assets/prometheus/rules/etcd2.rules diff --git a/assets/alerts/kubernetes.rules b/assets/prometheus/rules/kubernetes.rules similarity index 100% rename from assets/alerts/kubernetes.rules rename to assets/prometheus/rules/kubernetes.rules diff --git a/hack/scripts/generate-configmaps.sh b/hack/scripts/generate-configmaps.sh index a178878cfd14545c9eb3f95e0353908251b3b4ff..50f3c9033ec05921b8475cc9517fd1926b76e8bd 100755 --- a/hack/scripts/generate-configmaps.sh +++ b/hack/scripts/generate-configmaps.sh @@ -1,7 +1,10 @@ #!/bin/bash +# Generate Prometheus configuration ConfigMap +kubectl create configmap --dry-run=true prometheus-k8s --from-file=assets/prometheus/prometheus.yaml -oyaml > manifests/prometheus/prometheus-k8s-cm.yaml + # Generate Alert Rules ConfigMap -kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/alerts/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml +kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/prometheus/rules/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml # Generate Dashboard ConfigMap kubectl create configmap --dry-run=true grafana-dashboards --from-file=assets/grafana/ -oyaml > manifests/grafana/grafana-cm.yaml diff --git a/manifests/prometheus/prometheus-k8s-cm.yaml b/manifests/prometheus/prometheus-k8s-cm.yaml index 73389f517a46e2596ae168bb7752874a2dac4268..16bf02ef8aa40a6f589cdbebcf198c07cafbadd5 100644 --- a/manifests/prometheus/prometheus-k8s-cm.yaml +++ b/manifests/prometheus/prometheus-k8s-cm.yaml @@ -1,18 +1,15 @@ apiVersion: v1 -kind: ConfigMap -metadata: - name: prometheus-k8s data: prometheus.yaml: | global: - evaluation_interval: 30s + scrape_interval: 15s + evaluation_interval: 15s rule_files: - - /etc/prometheus/rules/*.rules + - /etc/prometheus/rules/*.rules scrape_configs: - job_name: kubelets - scrape_interval: 20s scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt @@ -27,7 +24,6 @@ data: # Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics, # and node-exporter, which we all consider part of a default setup. - job_name: standard-endpoints - scrape_interval: 20s tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # As for kubelets, certificate validation fails for the API server (node) @@ -54,7 +50,6 @@ data: # Scrapes the endpoint lists for the kube-dns server. Which we consider # part of a default setup. - job_name: kube-components - scrape_interval: 20s tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -74,3 +69,7 @@ data: - action: keep source_labels: [__meta_kubernetes_endpoint_port_name] regex: "prometheus" +kind: ConfigMap +metadata: + creationTimestamp: null + name: prometheus-k8s diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml index db867d731fc424d5c1014088f163bcca605c5417..3ea2358a1d7bafb9b3112e06e7fb210edb7546b9 100644 --- a/manifests/prometheus/prometheus-k8s-rules.yaml +++ b/manifests/prometheus/prometheus-k8s-rules.yaml @@ -53,6 +53,8 @@ data: \ summary = \"high fsync durations\",\n description = \"ectd instance {{ $labels.instance }} fync durations are high\",\n }\n" kubernetes.rules: |+ + # NOTE: These rules were kindly contributed by the SoundCloud engineering team. + ### Container resources ### cluster_namespace_controller_pod_container:spec_memory_limit_bytes =