From 926337feacef28c43a079ba4045ed385e7243532 Mon Sep 17 00:00:00 2001 From: Lili Cosic <cosiclili@gmail.com> Date: Thu, 16 Apr 2020 22:03:16 +0200 Subject: [PATCH] manifests: Regenerate --- manifests/grafana-dashboardDefinitions.yaml | 42 ++++++++--------- .../prometheus-operator-serviceMonitor.yaml | 4 +- manifests/prometheus-rules.yaml | 45 ++++++------------- ...-0thanosrulerCustomResourceDefinition.yaml | 5 +++ .../prometheus-operator-clusterRole.yaml | 2 +- ...rometheus-operator-clusterRoleBinding.yaml | 2 +- .../setup/prometheus-operator-deployment.yaml | 8 ++-- .../setup/prometheus-operator-service.yaml | 2 +- .../prometheus-operator-serviceAccount.yaml | 2 +- 9 files changed, 49 insertions(+), 63 deletions(-) diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index e5e89252..ca9ea003 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -283,7 +283,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "apiserver_request:availability7d{verb=\"read\"}", + "expr": "apiserver_request:availability30d{verb=\"read\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -654,7 +654,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "apiserver_request:availability7d{verb=\"write\"}", + "expr": "apiserver_request:availability30d{verb=\"write\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -27084,10 +27084,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n", + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27175,10 +27175,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n", + "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27279,10 +27279,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n", + "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27387,7 +27387,7 @@ items: "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27478,7 +27478,7 @@ items: "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27569,7 +27569,7 @@ items: "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27660,7 +27660,7 @@ items: "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27764,7 +27764,7 @@ items: "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -27855,7 +27855,7 @@ items: "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -28050,7 +28050,7 @@ items: "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{consumer}}", "refId": "A" } ], @@ -28154,7 +28154,7 @@ items: "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -28245,7 +28245,7 @@ items: "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -28336,7 +28336,7 @@ items: "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -28427,7 +28427,7 @@ items: "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", "refId": "A" } ], @@ -28580,11 +28580,11 @@ items: "includeAll": true, "label": null, "multi": false, - "name": "queue", + "name": "url", "options": [ ], - "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, queue)", + "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)", "refresh": 2, "regex": "", "sort": 0, diff --git a/manifests/prometheus-operator-serviceMonitor.yaml b/manifests/prometheus-operator-serviceMonitor.yaml index 571f5e25..21400f02 100644 --- a/manifests/prometheus-operator-serviceMonitor.yaml +++ b/manifests/prometheus-operator-serviceMonitor.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 name: prometheus-operator namespace: monitoring spec: @@ -19,4 +19,4 @@ spec: matchLabels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 79a8e557..34b445e8 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -389,12 +389,15 @@ spec: verb: write record: apiserver_request:availability30d - expr: | - sum by (code) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30d])) + sum by (code, verb) (increase(apiserver_request_total{job="apiserver"}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) labels: verb: read record: code:apiserver_request_total:increase30d - expr: | - sum by (code) (increase(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30d])) + sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) labels: verb: write record: code:apiserver_request_total:increase30d @@ -1311,29 +1314,6 @@ spec: for: 5m labels: severity: warning - - alert: KubeAPILatencyHigh - annotations: - message: The API server has a 99th percentile latency of {{ $value }} seconds - for {{ $labels.verb }} {{ $labels.resource }}. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh - expr: | - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} > 4 - for: 10m - labels: - severity: critical - - alert: KubeAPIErrorsHigh - annotations: - message: API server is returning errors for {{ $value | humanizePercentage - }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource - }}. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh - expr: | - sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb) - / - sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.10 - for: 10m - labels: - severity: critical - alert: KubeAPIErrorsHigh annotations: message: API server is returning errors for {{ $value | humanizePercentage @@ -1451,7 +1431,7 @@ spec: on node {{ $labels.node }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh expr: | - histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60 + histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 for: 15m labels: severity: warning @@ -1616,8 +1596,8 @@ spec: - alert: PrometheusRemoteStorageFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send - {{ printf "%.1f" $value }}% of the samples to {{ if $labels.queue }}{{ $labels.queue - }}{{ else }}{{ $labels.url }}{{ end }}. + {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ + $labels.url }} summary: Prometheus fails to send samples to remote storage. expr: | ( @@ -1637,8 +1617,8 @@ spec: - alert: PrometheusRemoteWriteBehind annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write - is {{ printf "%.1f" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue - }}{{ else }}{{ $labels.url }}{{ end }}. + is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url + }}. summary: Prometheus remote write is behind. expr: | # Without max_over_time, failed scrapes could create false negatives, see @@ -1655,8 +1635,9 @@ spec: - alert: PrometheusRemoteWriteDesiredShards annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write - desired shards calculation wants to run {{ $value }} shards, which is more - than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` + desired shards calculation wants to run {{ $value }} shards for queue {{ + $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ + printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}. summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. diff --git a/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml index 8121bbc2..5d630512 100644 --- a/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml @@ -612,6 +612,11 @@ spec: items: type: string type: array + alertQueryUrl: + description: The external Query URL the Thanos Ruler will set in the + 'Source' field of all alerts. Maps to the '--alert.query-url' CLI + arg. + type: string alertmanagersConfig: description: Define configuration for connecting to alertmanager. Only available with thanos v0.10.0 and higher. Maps to the `alertmanagers.config` diff --git a/manifests/setup/prometheus-operator-clusterRole.yaml b/manifests/setup/prometheus-operator-clusterRole.yaml index 054414f2..f42e2fe3 100644 --- a/manifests/setup/prometheus-operator-clusterRole.yaml +++ b/manifests/setup/prometheus-operator-clusterRole.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 name: prometheus-operator rules: - apiGroups: diff --git a/manifests/setup/prometheus-operator-clusterRoleBinding.yaml b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml index 6b634f28..765d3b6f 100644 --- a/manifests/setup/prometheus-operator-clusterRoleBinding.yaml +++ b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 name: prometheus-operator roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/manifests/setup/prometheus-operator-deployment.yaml b/manifests/setup/prometheus-operator-deployment.yaml index 2aeec682..e234a1d7 100644 --- a/manifests/setup/prometheus-operator-deployment.yaml +++ b/manifests/setup/prometheus-operator-deployment.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 name: prometheus-operator namespace: monitoring spec: @@ -18,15 +18,15 @@ spec: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 spec: containers: - args: - --kubelet-service=kube-system/kubelet - --logtostderr=true - --config-reloader-image=jimmidyson/configmap-reload:v0.3.0 - - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.38.0 - image: quay.io/coreos/prometheus-operator:v0.38.0 + - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.38.1 + image: quay.io/coreos/prometheus-operator:v0.38.1 name: prometheus-operator ports: - containerPort: 8080 diff --git a/manifests/setup/prometheus-operator-service.yaml b/manifests/setup/prometheus-operator-service.yaml index fbb448d9..1e9fd769 100644 --- a/manifests/setup/prometheus-operator-service.yaml +++ b/manifests/setup/prometheus-operator-service.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 name: prometheus-operator namespace: monitoring spec: diff --git a/manifests/setup/prometheus-operator-serviceAccount.yaml b/manifests/setup/prometheus-operator-serviceAccount.yaml index d4e56d45..bf622bf5 100644 --- a/manifests/setup/prometheus-operator-serviceAccount.yaml +++ b/manifests/setup/prometheus-operator-serviceAccount.yaml @@ -4,6 +4,6 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.38.0 + app.kubernetes.io/version: v0.38.1 name: prometheus-operator namespace: monitoring -- GitLab