diff --git a/README.md b/README.md index aabcb510c009a8c0ccbb4e9f38e81f4c7130ff86..9a8354d3eb54cbb2aee8569d7d698b7bcf205c83 100644 --- a/README.md +++ b/README.md @@ -260,13 +260,13 @@ These are the available fields with their respective default values: namespace: "default", versions+:: { - alertmanager: "v0.15.3", - nodeExporter: "v0.16.0", - kubeStateMetrics: "v1.3.1", - kubeRbacProxy: "v0.3.1", + alertmanager: "v0.16.0", + nodeExporter: "v0.17.0", + kubeStateMetrics: "v1.5.0", + kubeRbacProxy: "v0.4.1", addonResizer: "1.0", - prometheusOperator: "v0.24.0", - prometheus: "v2.4.3", + prometheusOperator: "v0.28.0", + prometheus: "v2.5.0", }, imageRepos+:: { @@ -374,7 +374,7 @@ If your kops cluster is using CoreDNS, there is an additional mixin to import. [embedmd]:# (examples/jsonnet-snippets/kops-coredns.jsonnet) ```jsonnet (import 'kube-prometheus/kube-prometheus.libsonnet') + -(import 'kube-prometheus/kube-prometheus-kops.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops.libsonnet') + (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet') ``` diff --git a/examples/jsonnet-snippets/kops-coredns.jsonnet b/examples/jsonnet-snippets/kops-coredns.jsonnet index 6e308579db9160660a537e12028b839321cd7ec4..6ba445dff751e10c8c52e2c7c57dd08eb43cd6e9 100644 --- a/examples/jsonnet-snippets/kops-coredns.jsonnet +++ b/examples/jsonnet-snippets/kops-coredns.jsonnet @@ -1,3 +1,3 @@ (import 'kube-prometheus/kube-prometheus.libsonnet') + -(import 'kube-prometheus/kube-prometheus-kops.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops.libsonnet') + (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet') diff --git a/jsonnet/kube-prometheus/jsonnetfile.json b/jsonnet/kube-prometheus/jsonnetfile.json index 652d47a1df3b9bd2d8031a9a6a19b5d968c415c7..7be8827a2d96b2a33b948fc62dd8c0dfe205e655 100644 --- a/jsonnet/kube-prometheus/jsonnetfile.json +++ b/jsonnet/kube-prometheus/jsonnetfile.json @@ -38,7 +38,7 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "v0.27.0" + "version": "v0.28.0" }, { "name": "etcd-mixin", diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 46794798ca92593fa5dda5636c61a25517418790..31ffed35bba923d33998b1a44b87d87840088265 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" } }, - "version": "d5f758dc5d07b214cd5cdf639847ab0197f42f76" + "version": "9c1ad1e863ddae4ec43f58e260077ec91ea2ae37" }, { "name": "ksonnet", @@ -28,7 +28,7 @@ "subdir": "" } }, - "version": "3b031fd4bb5c3027ab5e76a342758c203b535db0" + "version": "668950e4af13f0153fa1d7b58ebe7023b33f2217" }, { "name": "grafonnet", @@ -48,7 +48,7 @@ "subdir": "grafana-builder" } }, - "version": "eb4d2218cefc621cd31041d46c3dbf328325d68f" + "version": "ec3d4f943df01f517a083305666cd1c87bcc7e94" }, { "name": "grafana", @@ -68,7 +68,7 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "174c9bf17bec78b055e8e63c6ae3a3dc9bb0a3a8" + "version": "338addbabc8a29b46840df0bb0355c12b96a6f21" }, { "name": "etcd-mixin", @@ -78,7 +78,7 @@ "subdir": "Documentation/etcd-mixin" } }, - "version": "fa521f4e00fedfb6d98449d92a6408d0b3b0d922" + "version": "1fe6f109c87c4fa47775426a6a60c3b954ed5c33" } ] } diff --git a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml b/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml index 627ce96d4be312855b1265ef59dc52008562215b..158c5cb380839343448fe70e36f3fe9fd65c0b02 100644 --- a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml +++ b/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml @@ -1538,6 +1538,14 @@ spec: required: - name type: array + enableAdminAPI: + description: 'Enable access to prometheus web admin API. Defaults to + the value of `false`. WARNING: Enabling the admin APIs enables mutating + endpoints, to delete data, shutdown Prometheus, and more. Enabling + this should be done with care and the user is advised to add additional + authentication authorization via a proxy to ensure only clients authorized + to perform these actions can do so. For more information see https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis' + type: boolean evaluationInterval: description: Interval between consecutive evaluations. type: string @@ -1572,6 +1580,9 @@ spec: description: ListenLocal makes the Prometheus server listen on loopback, so that it does not bind against the Pod IP. type: boolean + logFormat: + description: Log format for Prometheus to be configured with. + type: string logLevel: description: Log level for Prometheus to be configured with. type: string @@ -2059,6 +2070,11 @@ spec: description: MinBackoff is the initial retry delay. Gets doubled for every retry. type: string + minShards: + description: MinShards is the minimum number of shards, i.e. + amount of concurrency. + format: int32 + type: integer remoteTimeout: description: Timeout for requests to the remote write endpoint. type: string @@ -2243,6 +2259,25 @@ spec: "In", and the values array contains only "value". The requirements are ANDed. type: object + rules: + description: /--rules.*/ command-line arguments + properties: + alert: + description: /--rules.alert.*/ command-line arguments + properties: + forGracePeriod: + description: Minimum duration between alert and restored 'for' + state. This is maintained only for alerts with configured + 'for' time greater than grace period. + type: string + forOutageTolerance: + description: Max time to tolerate prometheus outage for restoring + 'for' state of alert. + type: string + resendDelay: + description: Minimum amount of time to wait before resending + an alert to Alertmanager. + type: string scrapeInterval: description: Interval between consecutive scrapes. type: string @@ -2941,8 +2976,9 @@ spec: description: Thanos base image if other than default. type: string gcs: - description: ThanosGCSSpec defines parameters for use of Google - Cloud Storage (GCS) with Thanos. + description: 'Deprecated: ThanosGCSSpec should be configured with + an ObjectStorageConfig secret starting with Thanos v0.2.0. ThanosGCSSpec + will be removed.' properties: bucket: description: Google Cloud Storage bucket name for stored blocks. @@ -2970,6 +3006,22 @@ spec: to ensure the Prometheus Operator knows what version of Thanos is being configured. type: string + objectStorageConfig: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be + a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be + defined + type: boolean + required: + - key peers: description: Peers is a DNS name for Thanos to discover peers through. type: string @@ -2988,8 +3040,9 @@ spec: to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' type: object s3: - description: ThanosS3Spec defines parameters for of AWS Simple Storage - Service (S3) with Thanos. (S3 compatible services apply as well) + description: 'Deprecated: ThanosS3Spec should be configured with + an ObjectStorageConfig secret starting with Thanos v0.2.0. ThanosS3Spec + will be removed.' properties: accessKey: description: SecretKeySelector selects a key of a Secret. diff --git a/manifests/0prometheus-operator-clusterRole.yaml b/manifests/0prometheus-operator-clusterRole.yaml index e0ac283a3e479f1aa65ed877196179273c7b2ca7..123f78e93b5dfefe9b261c2a7b8af65d0e942ab2 100644 --- a/manifests/0prometheus-operator-clusterRole.yaml +++ b/manifests/0prometheus-operator-clusterRole.yaml @@ -44,11 +44,13 @@ rules: - "" resources: - services + - services/finalizers - endpoints verbs: - get - create - update + - delete - apiGroups: - "" resources: diff --git a/manifests/0prometheus-operator-deployment.yaml b/manifests/0prometheus-operator-deployment.yaml index 55ad90434f568cbdb3d888d137adf354f9ba8cb9..1f880582d0eaabfaab417a1ee58846497c6fe724 100644 --- a/manifests/0prometheus-operator-deployment.yaml +++ b/manifests/0prometheus-operator-deployment.yaml @@ -20,8 +20,8 @@ spec: - --kubelet-service=kube-system/kubelet - --logtostderr=true - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1 - - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.27.0 - image: quay.io/coreos/prometheus-operator:v0.27.0 + - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.28.0 + image: quay.io/coreos/prometheus-operator:v0.28.0 name: prometheus-operator ports: - containerPort: 8080 diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 5729174f815e475654622ce26c255fb084dec00f..443943c01cada85d07b068c502bc9d34fc981fab 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -314,7 +314,7 @@ spec: message: Alertmanager has disappeared from Prometheus target discovery. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown expr: | - absent(up{job="alertmanager-main"} == 1) + absent(up{job="alertmanager-main",namespace="monitoring"} == 1) for: 15m labels: severity: critical @@ -386,7 +386,7 @@ spec: message: Prometheus has disappeared from Prometheus target discovery. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown expr: | - absent(up{job="prometheus-k8s"} == 1) + absent(up{job="prometheus-k8s",namespace="monitoring"} == 1) for: 15m labels: severity: critical @@ -395,7 +395,7 @@ spec: message: PrometheusOperator has disappeared from Prometheus target discovery. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown expr: | - absent(up{job="prometheus-operator"} == 1) + absent(up{job="prometheus-operator",namespace="monitoring"} == 1) for: 15m labels: severity: critical @@ -799,7 +799,7 @@ spec: message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync. expr: | - count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 + count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 for: 5m labels: severity: critical @@ -808,7 +808,7 @@ spec: message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}. expr: | - alertmanager_config_last_reload_successful{job="alertmanager-main"} == 0 + alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0 for: 10m labels: severity: warning @@ -816,9 +816,9 @@ spec: annotations: message: Alertmanager has not found all other members of the cluster. expr: | - alertmanager_cluster_members{job="alertmanager-main"} + alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"} != on (service) GROUP_LEFT() - count by (service) (alertmanager_cluster_members{job="alertmanager-main"}) + count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}) for: 5m labels: severity: critical @@ -865,7 +865,7 @@ spec: description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}} summary: Reloading Prometheus' configuration failed expr: | - prometheus_config_last_reload_successful{job="prometheus-k8s"} == 0 + prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"} == 0 for: 10m labels: severity: warning @@ -875,7 +875,7 @@ spec: $labels.pod}} summary: Prometheus' alert notification queue is running full expr: | - predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="prometheus-k8s"} + predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"} for: 10m labels: severity: warning @@ -885,7 +885,7 @@ spec: $labels.pod}} to Alertmanager {{$labels.Alertmanager}} summary: Errors while sending alert from Prometheus expr: | - rate(prometheus_notifications_errors_total{job="prometheus-k8s"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s"}[5m]) > 0.01 + rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.01 for: 10m labels: severity: warning @@ -895,7 +895,7 @@ spec: $labels.pod}} to Alertmanager {{$labels.Alertmanager}} summary: Errors while sending alerts from Prometheus expr: | - rate(prometheus_notifications_errors_total{job="prometheus-k8s"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s"}[5m]) > 0.03 + rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.03 for: 10m labels: severity: critical @@ -905,7 +905,7 @@ spec: to any Alertmanagers summary: Prometheus is not connected to any Alertmanagers expr: | - prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s"} < 1 + prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"} < 1 for: 10m labels: severity: warning @@ -915,7 +915,7 @@ spec: reload failures over the last four hours.' summary: Prometheus has issues reloading data blocks from disk expr: | - increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s"}[2h]) > 0 + increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0 for: 12h labels: severity: warning @@ -925,7 +925,7 @@ spec: compaction failures over the last four hours.' summary: Prometheus has issues compacting sample blocks expr: | - increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s"}[2h]) > 0 + increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0 for: 12h labels: severity: warning @@ -935,7 +935,7 @@ spec: log (WAL).' summary: Prometheus write-ahead log is corrupted expr: | - tsdb_wal_corruptions_total{job="prometheus-k8s"} > 0 + tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0 for: 4h labels: severity: warning @@ -945,7 +945,7 @@ spec: samples. summary: Prometheus isn't ingesting samples expr: | - rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s"}[5m]) <= 0 + rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0 for: 10m labels: severity: warning @@ -955,7 +955,7 @@ spec: due to duplicate timestamps but different values' summary: Prometheus has many samples rejected expr: | - increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s"}[5m]) > 0 + increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 for: 10m labels: severity: warning @@ -966,7 +966,7 @@ spec: message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace. expr: | - rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1 + rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 for: 10m labels: severity: warning @@ -974,7 +974,7 @@ spec: annotations: message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. expr: | - rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator"}[5m]) > 0.1 + rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 for: 10m labels: severity: warning