diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ee91348fc8ea6f2d2e0f92e526eefc244e5c149e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +tmp/ +minikube-manifests/ +vendor/ +./auth diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..28a787523aa2a7510b183089c9a5fad4a92e249d --- /dev/null +++ b/Makefile @@ -0,0 +1,65 @@ +JSONNET_FMT := jsonnet fmt -n 2 --max-blank-lines 2 --string-style s --comment-style s + +JB_BINARY:=$(GOPATH)/bin/jb +EMBEDMD_BINARY:=$(GOPATH)/bin/embedmd + +all: generate fmt test + +../../hack/jsonnet-docker-image: ../../scripts/jsonnet/Dockerfile +# Create empty target file, for the sole purpose of recording when this target +# was last executed via the last-modification timestamp on the file. See +# https://www.gnu.org/software/make/manual/make.html#Empty-Targets + docker build -f - -t po-jsonnet . < ../../scripts/jsonnet/Dockerfile + touch $@ + +generate-in-docker: ../../hack/jsonnet-docker-image + @echo ">> Compiling assets and generating Kubernetes manifests" + docker run \ + --rm \ + -u=$(shell id -u $(USER)):$(shell id -g $(USER)) \ + -v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \ + -v $(shell go env GOCACHE):/.cache/go-build \ + --workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \ + po-jsonnet make generate + +generate: manifests **.md + +**.md: $(EMBEDMD_BINARY) $(shell find examples) build.sh example.jsonnet + $(EMBEDMD_BINARY) -w `find . -name "*.md" | grep -v vendor` + +manifests: vendor example.jsonnet build.sh + rm -rf manifests + ./build.sh ./examples/kustomize.jsonnet + +vendor: $(JB_BINARY) jsonnetfile.json jsonnetfile.lock.json + rm -rf vendor + $(JB_BINARY) install + +fmt: + find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \ + xargs -n 1 -- $(JSONNET_FMT) -i + +test: $(JB_BINARY) + $(JB_BINARY) install + ./test.sh + +test-e2e: + go test -timeout 55m -v ./tests/e2e -count=1 + +test-in-docker: ../../hack/jsonnet-docker-image + @echo ">> Compiling assets and generating Kubernetes manifests" + docker run \ + --rm \ + -u=$(shell id -u $(USER)):$(shell id -g $(USER)) \ + -v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \ + -v $(shell go env GOCACHE):/.cache/go-build \ + --workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \ + po-jsonnet make test + +$(JB_BINARY): + go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb + +$(EMBEDMD_BINARY): + go get github.com/campoy/embedmd + +.PHONY: generate generate-in-docker test test-in-docker fmt diff --git a/README.md b/README.md index bc24e666b32bca94121e058ebcf2a8facd75b7ed..5b978f71193c64b8772bbd3d5e166cb4d0b4fb79 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,633 @@ # kube-prometheus -This repository collects Kubernetes manifests, dashboards, and alerting rules -combined with documentation and scripts to provide single-command deployments -of end-to-end Kubernetes cluster monitoring. +> Note that everything in the `contrib/kube-prometheus/` directory is experimental and may change significantly at any time. -# This repository has moved +This repository collects Kubernetes manifests, [Grafana](http://grafana.com/) dashboards, and [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with [Prometheus](https://prometheus.io/) using the Prometheus Operator. -This repository has been merged with the [Prometheus Operator](https://github.com/coreos/prometheus-operator). It can now be found under [`contrib/kube-prometheus`](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus). +The content of this project is written in [jsonnet](http://jsonnet.org/). This project could both be described as a package as well as a library. + +Components included in this package: + +* The [Prometheus Operator](https://github.com/coreos/prometheus-operator) +* Highly available [Prometheus](https://prometheus.io/) +* Highly available [Alertmanager](https://github.com/prometheus/alertmanager) +* [Prometheus node-exporter](https://github.com/prometheus/node_exporter) +* [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) +* [Grafana](https://grafana.com/) + +This stack is meant for cluster monitoring, so it is pre-configured to collect metrics from all Kubernetes components. In addition to that it delivers a default set of dashboards and alerting rules. Many of the useful dashboards and alerts come from the [kubernetes-mixin project](https://github.com/kubernetes-monitoring/kubernetes-mixin), similar to this project it provides composable jsonnet as a library for users to customize to their needs. + +## Table of contents + +* [Prerequisites](#prerequisites) + * [minikube](#minikube) +* [Quickstart](#quickstart) +* [Customizing Kube-Prometheus](#customizing-kube-prometheus) + * [Installing](#installing) + * [Compiling](#compiling) + * [Containerized Installing and Compiling](#containerized-installing-and-compiling) +* [Configuration](#configuration) +* [Customization Examples](#customization-examples) + * [Cluster Creation Tools](#cluster-creation-tools) + * [Internal Registries](#internal-registries) + * [NodePorts](#nodeports) + * [Prometheus Object Name](#prometheus-object-name) + * [node-exporter DaemonSet namespace](#node-exporter-daemonset-namespace) + * [Alertmanager configuration](#alertmanager-configuration) + * [Static etcd configuration](#static-etcd-configuration) + * [Pod Anti-Affinity](#pod-anti-affinity) + * [Customizing Prometheus alerting/recording rules and Grafana dashboards](#customizing-prometheus-alertingrecording-rules-and-grafana-dashboards) + * [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress) +* [Minikube Example](#minikube-example) +* [Troubleshooting](#troubleshooting) + * [Error retrieving kubelet metrics](#error-retrieving-kubelet-metrics) + * [kube-state-metrics resource usage](#kube-state-metrics-resource-usage) +* [Contributing](#contributing) + +## Prerequisites + +You will need a Kubernetes cluster, that's it! By default it is assumed, that the kubelet uses token authentication and authorization, as otherwise Prometheus needs a client certificate, which gives it full access to the kubelet, rather than just the metrics. Token authentication and authorization allows more fine grained and easier access control. + +This means the kubelet configuration must contain these flags: + +* `--authentication-token-webhook=true` This flag enables, that a `ServiceAccount` token can be used to authenticate against the kubelet(s). +* `--authorization-mode=Webhook` This flag enables, that the kubelet will perform an RBAC request with the API to determine, whether the requesting entity (Prometheus in this case) is allow to access a resource, in specific for this project the `/metrics` endpoint. + +This stack provides [resource metrics](https://github.com/kubernetes/metrics#resource-metrics-api) by deploying the [Prometheus Adapter](https://github.com/DirectXMan12/k8s-prometheus-adapter/). +This adapter is an Extension API Server and Kubernetes needs to be have this feature enabled, otherwise the adapter has no effect, but is still deployed. + +### minikube + +In order to just try out this stack, start minikube with the following command: + +``` +$ minikube delete && minikube start --kubernetes-version=v1.13.2 --memory=4096 --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0 +``` + +> The kube-prometheus stack includes a resource metrics API server, like the metrics-server does. So ensure the metrics-server plugin is disabled on minikube: +> +> ``` +> minikube addons disable metrics-server +> ``` + +## Quickstart + +This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository). + +Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run: + * Simply create the stack: +``` +$ kubectl create -f manifests/ + +# It can take a few seconds for the above 'create manifests' command to fully create the following resources, so verify the resources are ready before proceeding. +$ until kubectl get customresourcedefinitions servicemonitors.monitoring.coreos.com ; do date; sleep 1; echo ""; done +$ until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done + +$ kubectl apply -f manifests/ # This command sometimes may need to be done twice (to workaround a race condition). +``` + + * And to teardown the stack: +``` +$ kubectl delete -f manifests/ +``` + +### Access the dashboards + +Prometheus, Grafana, and Alertmanager dashboards can be accessed quickly using `kubectl port-forward` after running the quickstart via the commands below. Kubernetes 1.10 or later is required. + +> Note: There are instructions on how to route to these pods behind an ingress controller in the [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress) section. + +Prometheus + +```shell +kubectl --namespace monitoring port-forward svc/prometheus-k8s 9090 +``` + +Then access via [http://localhost:9090](http://localhost:9090) + +Grafana + +```shell +kubectl --namespace monitoring port-forward svc/grafana 3000 +``` + +Then access via [http://localhost:3000](http://localhost:3000) and use the default grafana user:password of `admin:admin`. + +Alert Manager + +```shell +kubectl --namespace monitoring port-forward svc/alertmanager-main 9093 +``` + +Then access via [http://localhost:9093](http://localhost:9093) + +## Customizing Kube-Prometheus + +This section: + * describes how to customize the kube-prometheus library via compiling the kube-prometheus manifests yourself (as an alternative to the [Quickstart section](#Quickstart)). + * still doesn't require you to make a copy of this entire repository, but rather only a copy of a few select files. + +### Installing + +The content of this project consists of a set of [jsonnet](http://jsonnet.org/) files making up a library to be consumed. + +Install this library in your own project with [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler#install) (the jsonnet package manager): +``` +$ mkdir my-kube-prometheus; cd my-kube-prometheus +$ jb init # Creates the initial/empty `jsonnetfile.json` +# Install the kube-prometheus dependency +$ jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus # Creates `vendor/` & `jsonnetfile.lock.json`, and fills in `jsonnetfile.json` +``` + +> `jb` can be installed with `go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb` + +> An e.g. of how to install a given version of this library: `jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus/@v0.22.0` + +In order to update the kube-prometheus dependency, simply use the jsonnet-bundler update functionality: +`$ jb update` + +### Compiling + +e.g. of how to compile the manifests: `./build.sh example.jsonnet` + +> before compiling, install `gojsontoyaml` tool with `go get github.com/brancz/gojsontoyaml` + +Here's [example.jsonnet](example.jsonnet): + +[embedmd]:# (example.jsonnet) +```jsonnet +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + }; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + +``` + +And here's the [build.sh](build.sh) script (which uses `vendor/` to render all manifests in a json structure of `{filename: manifest-content}`): + +[embedmd]:# (build.sh) +```sh +#!/usr/bin/env bash + +# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files. + +set -e +set -x +# only exit with zero if all commands of the pipeline exit successfully +set -o pipefail + +# Make sure to start with a clean 'manifests' dir +rm -rf manifests +mkdir manifests + + # optional, but we would like to generate yaml, not json +jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {} + +``` + +> Note you need `jsonnet` (`go get github.com/google/go-jsonnet/cmd/jsonnet`) and `gojsontoyaml` (`go get github.com/brancz/gojsontoyaml`) installed to run `build.sh`. If you just want json output, not yaml, then you can skip the pipe and everything afterwards. + +This script runs the jsonnet code, then reads each key of the generated json and uses that as the file name, and writes the value of that key to that file, and converts each json manifest to yaml. + +### Apply the kube-prometheus stack +The previous steps (compilation) has created a bunch of manifest files in the manifest/ folder. +Now simply use kubectl to install Prometheus and Grafana as per your configuration: + +`kubectl apply -f manifests/` + +Check the monitoring namespace (or the namespace you have specific in `namespace: `) and make sure the pods are running. Prometheus and Grafana should be up and running soon. + +### Containerized Installing and Compiling + +If you don't care to have `jb` nor `jsonnet` nor `gojsontoyaml` installed, then build the `po-jsonnet` Docker image (this is something you'll need a copy of this repository for). Do the following from this `kube-prometheus` directory: +``` +$ make ../../hack/jsonnet-docker-image +``` + +Then you can do commands such as the following: +``` +docker run \ + --rm \ + -v `pwd`:`pwd` \ + --workdir `pwd` \ + po-jsonnet jb init + +docker run \ + --rm \ + -v `pwd`:`pwd` \ + --workdir `pwd` \ + po-jsonnet jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus + +docker run \ + --rm \ + -v `pwd`:`pwd` \ + --workdir `pwd` \ + po-jsonnet ./build.sh example.jsonnet +``` + +## Update from upstream project +You may wish to fetch changes made on this project so they are available to you. + +### Update jb +jb may have been updated so it's a good idea to get the latest version of this binary + +``` +go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb +``` + +### Update kube-prometheus +The command below will sync with upstream project. +``` +jb update +``` + +### Compile the manifests and apply +Once updated, just follow the instructions under "Compiling" and "Apply the kube-prometheus stack" to apply the changes to your cluster. + + +## Configuration + +Jsonnet has the concept of hidden fields. These are fields, that are not going to be rendered in a result. This is used to configure the kube-prometheus components in jsonnet. In the example jsonnet code of the above [Usage section](#Usage), you can see an example of this, where the `namespace` is being configured to be `monitoring`. In order to not override the whole object, use the `+::` construct of jsonnet, to merge objects, this way you can override individual settings, but retain all other settings and defaults. + +These are the available fields with their respective default values: +``` +{ + _config+:: { + namespace: "default", + + versions+:: { + alertmanager: "v0.16.1", + nodeExporter: "v0.17.0", + kubeStateMetrics: "v1.5.0", + kubeRbacProxy: "v0.4.1", + addonResizer: "1.8.4", + prometheusOperator: "v0.29.0", + prometheus: "v2.5.0", + }, + + imageRepos+:: { + prometheus: "quay.io/prometheus/prometheus", + alertmanager: "quay.io/prometheus/alertmanager", + kubeStateMetrics: "quay.io/coreos/kube-state-metrics", + kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy", + addonResizer: "k8s.gcr.io/addon-resizer", + nodeExporter: "quay.io/prometheus/node-exporter", + prometheusOperator: "quay.io/coreos/prometheus-operator", + }, + + prometheus+:: { + names: 'k8s', + replicas: 2, + rules: {}, + }, + + alertmanager+:: { + name: 'main', + config: ||| + global: + resolve_timeout: 5m + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - match: + alertname: Watchdog + receiver: 'null' + receivers: + - name: 'null' + |||, + replicas: 3, + }, + + kubeStateMetrics+:: { + collectors: '', // empty string gets a default set + scrapeInterval: '30s', + scrapeTimeout: '30s', + + baseCPU: '100m', + baseMemory: '150Mi', + cpuPerNode: '2m', + memoryPerNode: '30Mi', + }, + + nodeExporter+:: { + port: 9100, + }, + }, +} +``` + +The grafana definition is located in a different project (https://github.com/brancz/kubernetes-grafana), but needed configuration can be customized from the same top level `_config` field. For example to allow anonymous access to grafana, add the following `_config` section: +``` + grafana+:: { + config: { // http://docs.grafana.org/installation/configuration/ + sections: { + "auth.anonymous": {enabled: true}, + }, + }, + }, +``` + +## Customization Examples + +Jsonnet is a turing complete language, any logic can be reflected in it. It also has powerful merge functionalities, allowing sophisticated customizations of any kind simply by merging it into the object the library provides. + +### Cluster Creation Tools + +A common example is that not all Kubernetes clusters are created exactly the same way, meaning the configuration to monitor them may be slightly different. For [kubeadm](examples/jsonnet-snippets/kubeadm.jsonnet), [bootkube](examples/jsonnet-snippets/bootkube.jsonnet), [kops](examples/jsonnet-snippets/kops.jsonnet) and [kubespray](examples/jsonnet-snippets/kubespray.jsonnet) clusters there are mixins available to easily configure these: + +kubeadm: + +[embedmd]:# (examples/jsonnet-snippets/kubeadm.jsonnet) +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') +``` + +bootkube: + +[embedmd]:# (examples/jsonnet-snippets/bootkube.jsonnet) +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet') +``` + +kops: + +[embedmd]:# (examples/jsonnet-snippets/kops.jsonnet) +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops.libsonnet') +``` + +kops with CoreDNS: + +If your kops cluster is using CoreDNS, there is an additional mixin to import. + +[embedmd]:# (examples/jsonnet-snippets/kops-coredns.jsonnet) +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet') +``` + +kubespray: + +[embedmd]:# (examples/jsonnet-snippets/kubespray.jsonnet) +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kubespray.libsonnet') +``` + +kube-aws: + +[embedmd]:# (examples/jsonnet-snippets/kube-aws.jsonnet) +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kube-aws.libsonnet') +``` + +### Internal Registry + +Some Kubernetes installations source all their images from an internal registry. kube-prometheus supports this use case and helps the user synchronize every image it uses to the internal registry and generate manifests pointing at the internal registry. + +To produce the `docker pull/tag/push` commands that will synchronize upstream images to `internal-registry.com/organization` (after having run the `jb` command to populate the vendor directory): + +```shell +$ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization sync-to-internal-registry.jsonnet +docker pull k8s.gcr.io/addon-resizer:1.8.4 +docker tag k8s.gcr.io/addon-resizer:1.8.4 internal-registry.com/organization/addon-resizer:1.8.4 +docker push internal-registry.com/organization/addon-resizer:1.8.4 +docker pull quay.io/prometheus/alertmanager:v0.16.1 +docker tag quay.io/prometheus/alertmanager:v0.16.1 internal-registry.com/organization/alertmanager:v0.16.1 +docker push internal-registry.com/organization/alertmanager:v0.16.1 +... +``` + +The output of this command can be piped to a shell to be executed by appending `| sh`. + +Then to generate manifests with `internal-registry.com/organization`, use the `withImageRepository` mixin: + +[embedmd]:# (examples/internal-registry.jsonnet) +```jsonnet +local mixin = import 'kube-prometheus/kube-prometheus-config-mixins.libsonnet'; +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, +} + mixin.withImageRepository('internal-registry.com/organization'); + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` + +### NodePorts + +Another mixin that may be useful for exploring the stack is to expose the UIs of Prometheus, Alertmanager and Grafana on NodePorts: + +[embedmd]:# (examples/jsonnet-snippets/node-ports.jsonnet) +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +``` + +### Prometheus Object Name + +To give another customization example, the name of the `Prometheus` object provided by this library can be overridden: + +[embedmd]:# (examples/prometheus-name-override.jsonnet) +```jsonnet +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + prometheus+: { + prometheus+: { + metadata+: { + name: 'my-name', + }, + }, + }, + }).prometheus.prometheus +``` + +### node-exporter DaemonSet namespace + +Standard Kubernetes manifests are all written using [ksonnet-lib](https://github.com/ksonnet/ksonnet-lib/), so they can be modified with the mixins supplied by ksonnet-lib. For example to override the namespace of the node-exporter DaemonSet: + +[embedmd]:# (examples/ksonnet-example.jsonnet) +```jsonnet +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local daemonset = k.apps.v1beta2.daemonSet; + +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + nodeExporter+: { + daemonset+: + daemonset.mixin.metadata.withNamespace('my-custom-namespace'), + }, + }).nodeExporter.daemonset +``` + +### Alertmanager configuration + +The Alertmanager configuration is located in the `_config.alertmanager.config` configuration field. In order to set a custom Alertmanager configuration simply set this field. + +[embedmd]:# (examples/alertmanager-config.jsonnet) +```jsonnet +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + alertmanager+: { + config: ||| + global: + resolve_timeout: 10m + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - match: + alertname: Watchdog + receiver: 'null' + receivers: + - name: 'null' + |||, + }, + }, + }).alertmanager.secret +``` + +In the above example the configuration has been inlined, but can just as well be an external file imported in jsonnet via the `importstr` function. + +[embedmd]:# (examples/alertmanager-config-external.jsonnet) +```jsonnet +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + alertmanager+: { + config: importstr 'alertmanager-config.yaml', + }, + }, + }).alertmanager.secret +``` + +### Adding additional namespaces to monitor + +In order to monitor additional namespaces, the Prometheus server requires the appropriate `Role` and `RoleBinding` to be able to discover targets from that namespace. By default the Prometheus server is limited to the three namespaces it requires: default, kube-system and the namespace you configure the stack to run in via `$._config.namespace`. This is specified in `$._config.prometheus.namespaces`, to add new namespaces to monitor, simply append the additional namespaces: + +[embedmd]:# (examples/additional-namespaces.jsonnet) +```jsonnet +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + + prometheus+:: { + namespaces+: ['my-namespace', 'my-second-namespace'], + }, + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` + +### Static etcd configuration + +In order to configure a static etcd cluster to scrape there is a simple [kube-prometheus-static-etcd.libsonnet](jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) mixin prepared - see [etcd.jsonnet](examples/etcd.jsonnet) for an example of how to use that mixin, and [Monitoring external etcd](docs/monitoring-external-etcd.md) for more information. + +> Note that monitoring etcd in minikube is currently not possible because of how etcd is setup. (minikube's etcd binds to 127.0.0.1:2379 only, and within host networking namespace.) + +### Pod Anti-Affinity + +To prevent `Prometheus` and `Alertmanager` instances from being deployed onto the same node when +possible, one can include the [kube-prometheus-anti-affinity.libsonnet](jsonnet/kube-prometheus/kube-prometheus-anti-affinity.libsonnet) mixin: + +```jsonnet +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') +``` + +### Customizing Prometheus alerting/recording rules and Grafana dashboards + +See [developing Prometheus rules and Grafana dashboards](docs/developing-prometheus-rules-and-grafana-dashboards.md) guide. + +### Exposing Prometheus/Alermanager/Grafana via Ingress + +See [exposing Prometheus/Alertmanager/Grafana](docs/exposing-prometheus-alertmanager-grafana-ingress.md) guide. + +## Minikube Example + +To use an easy to reproduce example, see [minikube.jsonnet](examples/minikube.jsonnet), which uses the minikube setup as demonstrated in [Prerequisites](#prerequisites). Because we would like easy access to our Prometheus, Alertmanager and Grafana UIs, `minikube.jsonnet` exposes the services as NodePort type services. + +## Troubleshooting + +### Error retrieving kubelet metrics + +Should the Prometheus `/targets` page show kubelet targets, but not able to successfully scrape the metrics, then most likely it is a problem with the authentication and authorization setup of the kubelets. + +As described in the [Prerequisites](#prerequisites) section, in order to retrieve metrics from the kubelet token authentication and authorization must be enabled. Some Kubernetes setup tools do not enable this by default. + +If you are using Google's GKE product, see [cAdvisor support](docs/GKE-cadvisor-support.md). + +#### Authentication problem + +The Prometheus `/targets` page will show the kubelet job with the error `403 Unauthorized`, when token authentication is not enabled. Ensure, that the `--authentication-token-webhook=true` flag is enabled on all kubelet configurations. + +#### Authorization problem + +The Prometheus `/targets` page will show the kubelet job with the error `401 Unauthorized`, when token authorization is not enabled. Ensure that the `--authorization-mode=Webhook` flag is enabled on all kubelet configurations. + +### kube-state-metrics resource usage + +In some environments, kube-state-metrics may need additional +resources. One driver for more resource needs, is a high number of +namespaces. There may be others. + +kube-state-metrics resource allocation is managed by +[addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny) +You can control it's parameters by setting variables in the +config. They default to: + +``` jsonnet + kubeStateMetrics+:: { + baseCPU: '100m', + cpuPerNode: '2m', + baseMemory: '150Mi', + memoryPerNode: '30Mi', + } +``` + +## Contributing + +All `.yaml` files in the `/manifests` folder are generated via +[Jsonnet](https://jsonnet.org/). Contributing changes will most likely include +the following process: + +1. Make your changes in the respective `*.jsonnet` file. +2. Commit your changes (This is currently necessary due to our vendoring + process. This is likely to change in the future). +3. Update the pinned kube-prometheus dependency in `jsonnetfile.lock.json`: `jb + update`. +3. Generate dependent `*.yaml` files: `make generate-in-docker`. +4. Commit the generated changes. diff --git a/build.sh b/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..f68cd4475df21548ce6f7b26eff574929f3043e7 --- /dev/null +++ b/build.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files. + +set -e +set -x +# only exit with zero if all commands of the pipeline exit successfully +set -o pipefail + +# Make sure to start with a clean 'manifests' dir +rm -rf manifests +mkdir manifests + + # optional, but we would like to generate yaml, not json +jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {} + diff --git a/docs/GKE-cadvisor-support.md b/docs/GKE-cadvisor-support.md new file mode 100644 index 0000000000000000000000000000000000000000..0d763ac0eb2d774b0684e0f1684ec9053d5d6d9d --- /dev/null +++ b/docs/GKE-cadvisor-support.md @@ -0,0 +1,36 @@ +# Kubelet / cAdvisor special configuration updates for GKE + +Prior to GKE 1.11, the kubelet does not support token +authentication. Until it does, Prometheus must use HTTP (not HTTPS) +for scraping. + +You can configure this behavior through kube-prometheus with: +``` +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + + (import 'kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet') + + { + _config+:: { + # ... config here + } + }; +``` + +Or, you can patch and re-apply your existing manifests with: + +On linux: + +``` +sed -i -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml +``` + +On MacOs: + +``` +sed -i '' -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml +``` + +After you have modified the yaml file please run + +``` +kubectl apply -f manifests/prometheus-serviceMonitorKubelet.yaml +``` diff --git a/docs/developing-prometheus-rules-and-grafana-dashboards.md b/docs/developing-prometheus-rules-and-grafana-dashboards.md new file mode 100644 index 0000000000000000000000000000000000000000..17838b829b44200af1d555523934eeba44260b05 --- /dev/null +++ b/docs/developing-prometheus-rules-and-grafana-dashboards.md @@ -0,0 +1,293 @@ +# Developing Prometheus Rules and Grafana Dashboards + +`kube-prometheus` ships with a set of default [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) and [Grafana](http://grafana.com/) dashboards. At some point one might like to extend them, the purpose of this document is to explain how to do this. + +All manifests of kube-prometheus are generated using [jsonnet](https://jsonnet.org/) and Prometheus rules and Grafana dashboards in specific follow the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/). + +For both the Prometheus rules and the Grafana dashboards Kubernetes `ConfigMap`s are generated within kube-prometheus. In order to add additional rules and dashboards simply merge them onto the existing json objects. This document illustrates examples for rules as well as dashboards. + +As a basis, all examples in this guide are based on the base example of the kube-prometheus [readme](../README.md): + +[embedmd]:# (../example.jsonnet) +```jsonnet +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + }; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + +``` + +## Prometheus rules + +### Alerting rules + +According to the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/) Prometheus alerting rules are under the key `prometheusAlerts` in the top level object, so in order to add an additional alerting rule, we can simply merge an extra rule into the existing object. + +The format is exactly the Prometheus format, so there should be no changes necessary should you have existing rules that you want to include. + +> Note that alerts can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file. + +[embedmd]:# (../examples/prometheus-additional-alert-rule-example.jsonnet) +```jsonnet +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + prometheusAlerts+:: { + groups+: [ + { + name: 'example-group', + rules: [ + { + alert: 'Watchdog', + expr: 'vector(1)', + labels: { + severity: 'none', + }, + annotations: { + description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.', + }, + }, + ], + }, + ], + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` + +### Recording rules + +In order to add a recording rule, simply do the same with the `prometheusRules` field. + +> Note that rules can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file. + +[embedmd]:# (../examples/prometheus-additional-recording-rule-example.jsonnet) +```jsonnet +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + prometheusRules+:: { + groups+: [ + { + name: 'example-group', + rules: [ + { + record: 'some_recording_rule_name', + expr: 'vector(1)', + }, + ], + }, + ], + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` + +### Pre-rendered rules + +We acknowledge, that users may need to transition existing rules, and therefore allow an option to add additional pre-rendered rules. Luckily the yaml and json formats are very close so the yaml rules just need to be converted to json without any manual interaction needed. Just a tool to convert yaml to json is needed: + +``` +go get -u -v github.com/brancz/gojsontoyaml +``` + +And convert the existing rule file: + +``` +cat existingrule.yaml | gojsontoyaml -yamltojson > existingrule.json +``` + +Then import it in jsonnet: + +[embedmd]:# (../examples/prometheus-additional-rendered-rule-example.jsonnet) +```jsonnet +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + prometheusAlerts+:: (import 'existingrule.json'), +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` +### Changing default rules + +Along with adding additional rules, we give the user the option to filter or adjust the existing rules imported by `kube-prometheus/kube-prometheus.libsonnet`. The recording rules can be found in [kube-prometheus/rules](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus/jsonnet/kube-prometheus/rules) and [kubernetes-mixin/rules](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/rules) while the alerting rules can be found in [kube-prometheus/alerts](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus/jsonnet/kube-prometheus/alerts) and [kubernetes-mixin/alerts](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/alerts). + +Knowing which rules to change, the user can now use functions from the [Jsonnet standard library](https://jsonnet.org/ref/stdlib.html) to make these changes. Below are examples of both a filter and an adjustment being made to the default rules. These changes can be assigned to a local variable and then added to the `local kp` object as seen in the examples above. + +#### Filter +Here the alert `KubeStatefulSetReplicasMismatch` is being filtered out of the group `kubernetes-apps`. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). +```jsonnet +local filter = { + prometheusAlerts+:: { + groups: std.map( + function(group) + if group.name == 'kubernetes-apps' then + group { + rules: std.filter(function(rule) + rule.alert != "KubeStatefulSetReplicasMismatch", + group.rules + ) + } + else + group, + super.groups + ), + }, +}; +``` +#### Adjustment +Here the expression for the alert used above is updated from its previous value. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). +```jsonnet +local update = { + prometheusAlerts+:: { + groups: std.map( + function(group) + if group.name == 'kubernetes-apps' then + group { + rules: std.map( + function(rule) + if rule.alert == "KubeStatefulSetReplicasMismatch" then + rule { + expr: "kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\",statefulset!=\"vault\"} != kube_statefulset_status_replicas{job=\"kube-state-metrics\",statefulset!=\"vault\"}" + } + else + rule, + group.rules + ) + } + else + group, + super.groups + ), + }, +}; +``` +Using the example from above about adding in pre-rendered rules, the new local vaiables can be added in as follows: +```jsonnet +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + filter + update + { + prometheusAlerts+:: (import 'existingrule.json'), +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` +## Dashboards + +Dashboards can either be added using jsonnet or simply a pre-rendered json dashboard. + +### Jsonnet dashboard + +We recommend using the [grafonnet](https://github.com/grafana/grafonnet-lib/) library for jsonnet, which gives you a simple DSL to generate Grafana dashboards. Following the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/) additional dashboards are added to the `grafanaDashboards` key, located in the top level object. To add new jsonnet dashboards, simply add one. + +> Note that dashboards can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file. + +[embedmd]:# (../examples/grafana-additional-jsonnet-dashboard-example.jsonnet) +```jsonnet +local grafana = import 'grafonnet/grafana.libsonnet'; +local dashboard = grafana.dashboard; +local row = grafana.row; +local prometheus = grafana.prometheus; +local template = grafana.template; +local graphPanel = grafana.graphPanel; + +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + grafanaDashboards+:: { + 'my-dashboard.json': + dashboard.new('My Dashboard') + .addTemplate( + { + current: { + text: 'Prometheus', + value: 'Prometheus', + }, + hide: 0, + label: null, + name: 'datasource', + options: [], + query: 'prometheus', + refresh: 1, + regex: '', + type: 'datasource', + }, + ) + .addRow( + row.new() + .addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource') + .addTarget(prometheus.target('vector(1)'))) + ), + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` + +### Pre-rendered Grafana dashboards + +As jsonnet is a superset of json, the jsonnet `import` function can be used to include Grafana dashboard json blobs. In this example we are importing a [provided example dashboard](../examples/example-grafana-dashboard.json). + +[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example.jsonnet) +```jsonnet +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + grafanaDashboards+:: { + 'my-dashboard.json': (import 'example-grafana-dashboard.json'), + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` diff --git a/docs/exposing-prometheus-alertmanager-grafana-ingress.md b/docs/exposing-prometheus-alertmanager-grafana-ingress.md new file mode 100644 index 0000000000000000000000000000000000000000..7fb2d4f6a8d4c8cb096f9a8978c48962b46b58b3 --- /dev/null +++ b/docs/exposing-prometheus-alertmanager-grafana-ingress.md @@ -0,0 +1,101 @@ +# Exposing Prometheus, Alertmanager and Grafana UIs via Ingress + +In order to access the web interfaces via the Internet [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) is a popular option. This guide explains, how Kubernetes Ingress can be setup, in order to expose the Prometheus, Alertmanager and Grafana UIs, that are included in the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) project. + +Note: before continuing, it is recommended to first get familiar with the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack by itself. + +## Prerequisites + +Apart from a running Kubernetes cluster with a running [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack, a Kubernetes Ingress controller must be installed and functional. This guide was tested with the [nginx-ingress-controller](https://github.com/kubernetes/ingress-nginx). If you wish to reproduce the exact result in as depicted in this guide we recommend using the nginx-ingress-controller. + +## Setting up Ingress + +The setup of Ingress objects is the same for Prometheus, Alertmanager and Grafana. Therefore this guides demonstrates it in detail for Prometheus as it can easily be adapted for the other applications. + +As monitoring data may contain sensitive data, this guide describes how to setup Ingress with basic auth as an example of minimal security. Of course this should be adapted to the preferred authentication mean of any particular organization, but we feel it is important to at least provide an example with a minimum of security. + +In order to setup basic auth, a secret with the `htpasswd` formatted file needs to be created. To do this, first install the [`htpasswd`](https://httpd.apache.org/docs/2.4/programs/htpasswd.html) tool. + +To create the `htpasswd` formatted file called `auth` run: + +``` +htpasswd -c auth <username> +``` + +In order to use this a secret needs to be created containing the name of the `htpasswd`, and with annotations on the Ingress object basic auth can be configured. + +Also, the applications provide external links to themselves in alerts and various places. When an ingress is used in front of the applications these links need to be based on the external URL's. This can be configured for each application in jsonnet. + +```jsonnet +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local secret = k.core.v1.secret; +local ingress = k.extensions.v1beta1.ingress; +local ingressTls = ingress.mixin.spec.tlsType; +local ingressRule = ingress.mixin.spec.rulesType; +local httpIngressPath = ingressRule.mixin.http.pathsType; + +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + + { + _config+:: { + namespace: 'monitoring', + }, + prometheus+:: { + prometheus+: { + spec+: { + externalUrl: 'http://prometheus.example.com', + }, + }, + }, + ingress+:: { + 'prometheus-k8s': + ingress.new() + + ingress.mixin.metadata.withName($.prometheus.prometheus.metadata.name) + + ingress.mixin.metadata.withNamespace($.prometheus.prometheus.metadata.namespace) + + ingress.mixin.metadata.withAnnotations({ + 'nginx.ingress.kubernetes.io/auth-type': 'basic', + 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', + 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', + }) + + ingress.mixin.spec.withRules( + ingressRule.new() + + ingressRule.withHost('prometheus.example.com') + + ingressRule.mixin.http.withPaths( + httpIngressPath.new() + + httpIngressPath.mixin.backend.withServiceName($.prometheus.service.metadata.name) + + httpIngressPath.mixin.backend.withServicePort('web') + ), + ), + }, + } + { + ingress+:: { + 'basic-auth-secret': + secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) + + secret.mixin.metadata.withNamespace($._config.namespace), + }, + }; + +k.core.v1.list.new([ + kp.ingress['prometheus-k8s'], + kp.ingress['basic-auth-secret'], +]) +``` + +In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL. + +In order to render the ingress objects similar to the other objects use as demonstrated in the [main readme](../README.md#usage): + +``` +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + +{ ['ingress-' + name]: kp.ingress[name] for name in std.objectFields(kp.ingress) } +``` + +Note, that in comparison only the last line was added, the rest is identical to the original. + +See [ingress.jsonnet](../examples/ingress.jsonnet) for an example implementation. diff --git a/docs/kube-prometheus-on-kubeadm.md b/docs/kube-prometheus-on-kubeadm.md new file mode 100644 index 0000000000000000000000000000000000000000..d0101fea406ddfb057eafd8887bd5875938dbb09 --- /dev/null +++ b/docs/kube-prometheus-on-kubeadm.md @@ -0,0 +1,159 @@ +<br> +<div class="alert alert-info" role="alert"> + <i class="fa fa-exclamation-triangle"></i><b> Note:</b> Starting with v0.12.0, Prometheus Operator requires use of Kubernetes v1.7.x and up. +</div> + +# Kube Prometheus on Kubeadm + +The [kubeadm](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/) tool is linked by Kubernetes as the offical way to deploy and manage self-hosted clusters. Kubeadm does a lot of heavy lifting by automatically configuring your Kubernetes cluster with some common options. This guide is intended to show you how to deploy Prometheus, Prometheus Operator and Kube Prometheus to get you started monitoring your cluster that was deployed with Kubeadm. + +This guide assumes you have a basic understanding of how to use the functionality the Prometheus Operator implements. If you haven't yet, we recommend reading through the [getting started guide](../../../Documentation/user-guides/getting-started.md) as well as the [alerting guide](../../../Documentation/user-guides/alerting.md). + +## Kubeadm Pre-requisites + +This guide assumes you have some familiarity with `kubeadm` or at least have deployed a cluster using `kubeadm`. By default, `kubeadm` does not expose two of the services that we will be monitoring. Therefore, in order to get the most out of the `kube-prometheus` package, we need to make some quick tweaks to the Kubernetes cluster. Since we will be monitoring the `kube-controller-manager` and `kube-scheduler`, we must expose them to the cluster. + +By default, `kubeadm` runs these pods on your master and bound to `127.0.0.1`. There are a couple of ways to change this. The recommended way to change these features is to use the [kubeadm config file](https://kubernetes.io/docs/reference/generated/kubeadm/#config-file). An example configuration file can be used: + +```yaml +apiVersion: kubeadm.k8s.io/v1alpha1 +kind: MasterConfiguration +api: + advertiseAddress: 192.168.1.173 + bindPort: 6443 +authorizationModes: +- Node +- RBAC +certificatesDir: /etc/kubernetes/pki +cloudProvider: +etcd: + dataDir: /var/lib/etcd + endpoints: null +imageRepository: gcr.io/google_containers +kubernetesVersion: v1.8.3 +networking: + dnsDomain: cluster.local + serviceSubnet: 10.96.0.0/12 +nodeName: your-dev +tokenTTL: 24h0m0s +controllerManagerExtraArgs: + address: 0.0.0.0 +schedulerExtraArgs: + address: 0.0.0.0 +``` + +Notice the `schedulerExtraArgs` and `controllerManagerExtraArgs`. This exposes the `kube-controller-manager` and `kube-scheduler` services to the rest of the cluster. If you have kubernetes core components as pods in the kube-system namespace, ensure that the `kube-prometheus-exporter-kube-scheduler` and `kube-prometheus-exporter-kube-controller-manager` services' `spec.selector` values match those of pods. + +In addition, we will be using `node-exporter` to monitor the `cAdvisor` service on all the nodes. This, however requires a change to the `kubelet` service on the master as well as all the nodes. According to the Kubernetes documentation + +> The kubeadm deb package ships with configuration for how the kubelet should be run. Note that the `kubeadm` CLI command will never touch this drop-in file. This drop-in file belongs to the kubeadm deb/rpm package. + +Again, we need to expose the `cadvisor` that is installed and managed by the `kubelet` daemon and allow webhook token authentication. To do so, we do the following on all the masters and nodes: + +```bash +KUBEADM_SYSTEMD_CONF=/etc/systemd/system/kubelet.service.d/10-kubeadm.conf +sed -e "/cadvisor-port=0/d" -i "$KUBEADM_SYSTEMD_CONF" +if ! grep -q "authentication-token-webhook=true" "$KUBEADM_SYSTEMD_CONF"; then + sed -e "s/--authorization-mode=Webhook/--authentication-token-webhook=true --authorization-mode=Webhook/" -i "$KUBEADM_SYSTEMD_CONF" +fi +systemctl daemon-reload +systemctl restart kubelet +``` + +In case you already have a Kubernetes deployed with kubeadm, change the address kube-controller-manager and kube-scheduler listens in addition to previous kubelet change: + +``` +sed -e "s/- --address=127.0.0.1/- --address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-controller-manager.yaml +sed -e "s/- --address=127.0.0.1/- --address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-scheduler.yaml +``` + +With these changes, your Kubernetes cluster is ready. + +## Metric Sources + +Monitoring a Kubernetes cluster with Prometheus is a natural choice as Kubernetes components themselves are instrumented with Prometheus metrics, therefore those components simply have to be discovered by Prometheus and most of the cluster is monitored. + +Metrics that are rather about cluster state than a single component's metrics is exposed by the add-on component [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). + +Additionally, to have an overview of cluster nodes' resources the Prometheus [node_exporter](https://github.com/prometheus/node_exporter) is used. The node_exporter allows monitoring a node's resources: CPU, memory and disk utilization and more. + +Once you complete this guide you will monitor the following: + +* cluster state via kube-state-metrics +* nodes via the node_exporter +* kubelets +* apiserver +* kube-scheduler +* kube-controller-manager + + +## Getting Up and Running Fast with Kube-Prometheus + +To help get started more quickly with monitoring Kubernetes clusters, [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) was created. It is a collection of manifests including dashboards and alerting rules that can easily be deployed. It utilizes the Prometheus Operator and all the manifests demonstrated in [this guide](../../../Documentation/user-guides/cluster-monitoring.md). + +This section represent a quick installation and is not intended to teach you about all the components. The easiest way to get started is to clone this repository and use the `kube-prometheus` section of the code. + +``` +git clone https://github.com/coreos/prometheus-operator +cd prometheus-operator/contrib/kube-prometheus/ +``` + +First, create the namespace in which you want the monitoring tool suite to be running. + +``` +export NAMESPACE='monitoring' +kubectl create namespace "$NAMESPACE" +``` + +Now we will create the components for the Prometheus operator + +``` +kubectl --namespace="$NAMESPACE" apply -f manifests/prometheus-operator +``` + +This will create all the Prometheus Operator components. You might need to wait a short amount of time before the Custom Resource Definitions are available in the cluster. You can wait for them: + +``` +until kubectl --namespace="$NAMESPACE" get alertmanagers.monitoring.coreos.com > /dev/null 2>&1; do sleep 1; printf "."; done +``` + +Next, we will install the node exporter and then kube-state-metrics: + +``` +kubectl --namespace="$NAMESPACE" apply -f manifests/node-exporter +kubectl --namespace="$NAMESPACE" apply -f manifests/kube-state-metrics +``` + +Then, we can deploy the grafana credentials. By default, the username/password will be `admin/admin`, you should change these for your production clusters. + +``` +kubectl --namespace="$NAMESPACE" apply -f manifests/grafana/grafana-credentials.yaml +``` + +Then install grafana itself: + +``` +kubectl --namespace="$NAMESPACE" apply -f manifests/grafana +``` + +Next up is the `Prometheus` object itself. We will deploy the application, and then the roles/role-bindings. + +``` +find manifests/prometheus -type f ! -name prometheus-k8s-roles.yaml ! -name prometheus-k8s-role-bindings.yaml -exec kubectl --namespace "$NAMESPACE" apply -f {} \; +kubectl apply -f manifests/prometheus/prometheus-k8s-roles.yaml +kubectl apply -f manifests/prometheus/prometheus-k8s-role-bindings.yaml +``` + +Finally, install the [Alertmanager](../../../Documentation/user-guides/alerting.md) + +``` +kubectl --namespace="$NAMESPACE" apply -f manifests/alertmanager +``` + +Now you should have a working cluster. After all the pods are ready, you should be able to reach: + +* Prometheus UI on node port `30900` +* Alertmanager UI on node port `30903` +* Grafana on node port `30902` + +These can of course be changed via the Service definitions. It is recommended to look at the [Exposing Prometheus and Alert Manager](../../../Documentation/user-guides/exposing-prometheus-and-alertmanager.md) documentation for more detailed information on how to expose these services. diff --git a/docs/monitoring-external-etcd.md b/docs/monitoring-external-etcd.md new file mode 100644 index 0000000000000000000000000000000000000000..1e26af0fdc2347a711311b6d18529a6571223074 --- /dev/null +++ b/docs/monitoring-external-etcd.md @@ -0,0 +1,64 @@ +# Monitoring external etcd +This guide will help you monitor an external etcd cluster. When the etcd cluster is not hosted inside Kubernetes. +This is often the case with Kubernetes setups. This approach has been tested with kube-aws but the same principals apply to other tools. + +Note that [etcd.jsonnet](../examples/etcd.jsonnet) & [kube-prometheus-static-etcd.libsonnet](../jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) (which are described by a section of the [Readme](../README.md#static-etcd-configuration)) do the following: + * Put the three etcd TLS client files (CA & cert & key) into a secret in the namespace, and have Prometheus Operator load the secret. + * Create the following (to expose etcd metrics - port 2379): a Service, Endpoint, & ServiceMonitor. + +# Step 1: Open the port + +You now need to allow the nodes Prometheus are running on to talk to the etcd on the port 2379 (if 2379 is the port used by etcd to expose the metrics) + +If using kube-aws, you will need to edit the etcd security group inbound, specifying the security group of your Kubernetes node (worker) as the source. + +## kube-aws and EIP or ENI inconsistency +With kube-aws, each etcd node has two IP addresses: + +* EC2 instance IP +* EIP or ENI (depending on the chosen method in yuour cluster.yaml) + +For some reason, some etcd node answer to :2379/metrics on the intance IP (eth0), some others on the EIP|ENI address (eth1). See issue https://github.com/kubernetes-incubator/kube-aws/issues/923 +It would be of course much better if we could hit the EPI/ENI all the time as they don't change even if the underlying EC2 intance goes down. +If specifying the Instance IP (eth0) in the Prometheus Operator ServiceMonitor, and the EC2 intance goes down, one would have to update the ServiceMonitor. + +Another idea woud be to use the DNS entries of etcd, but those are not currently supported for EndPoints objects in Kubernetes. + +# Step 2: verify + +Go to the Prometheus UI on :9090/config and check that you have an etcd job entry: +``` +- job_name: monitoring/etcd-k8s/0 + scrape_interval: 30s + scrape_timeout: 10s + ... +``` + +On the :9090/targets page: + * You should see "etcd" with the UP state. If not, check the Error column for more information. + * If no "etcd" targets are even shown on this page, prometheus isn't attempting to scrape it. + +# Step 3: Grafana dashboard + +## Find a dashboard you like + +Try to load this dashboard: +https://grafana.com/dashboards/3070 + +## Save the dashboard in the configmap + +As documented here, [Developing Alerts and Dashboards](developing-prometheus-rules-and-grafana-dashboards.md), the Grafana instances are stateless. The dashboards are automatically re-loaded from the ConfigMap. +So if you load a dashboard through the Grafana UI, it won't be kept unless saved in ConfigMap + +Read [the document](developing-prometheus-rules-and-grafana-dashboards.md), but in summary: + +### Copy your dashboard: +Once you are happy with the dashboard, export it and move it to `prometheus-operator/contrib/kube-prometheus/assets/grafana/` (ending in "-dashboard.json") + +### Regenetate the grafana dashboard manifest: +`hack/scripts/generate-dashboards-configmap.sh > manifests/grafana/grafana-dashboards.yaml` + +### Reload the manifest in Kubernetes: +` kubectl -n monitoring replace -f manifests/grafana/grafana-dashboards.yaml` + +After a few minutes your dasboard will be available permanently to all Grafana instances diff --git a/docs/monitoring-other-namespaces.md b/docs/monitoring-other-namespaces.md new file mode 100644 index 0000000000000000000000000000000000000000..8327ed024b31b2da6afbcdf0ab0fb84738232404 --- /dev/null +++ b/docs/monitoring-other-namespaces.md @@ -0,0 +1,28 @@ +# Monitoring other Kubernetes Namespaces +This guide will help you monitor applications in other Namespaces. By default the RBAC rules are only enabled for the `Default` and `kube-system` Namespace during Install. + +# Setup +You have to give the list of the Namespaces that you want to be able to monitor. +This is done in the variable `prometheus.roleSpecificNamespaces`. You usually set this in your `.jsonnet` file when building the manifests. + +Example to create the needed `Role` and `Rolebindig` for the Namespace `foo` : +``` +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + + prometheus+:: { + namespaces: ["default", "kube-system","foo"], + }, + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + +``` diff --git a/example.jsonnet b/example.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..c446eac140558b7a2c1aaaef05e3c1a8f2467faa --- /dev/null +++ b/example.jsonnet @@ -0,0 +1,16 @@ +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + }; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + diff --git a/examples/additional-namespaces.jsonnet b/examples/additional-namespaces.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..957fd9124741ef75acf4541fe5bfaeea52e4b55d --- /dev/null +++ b/examples/additional-namespaces.jsonnet @@ -0,0 +1,17 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + + prometheus+:: { + namespaces+: ['my-namespace', 'my-second-namespace'], + }, + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/alertmanager-config-external.jsonnet b/examples/alertmanager-config-external.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..c2b34ccaa36892ce5a0fa28ff1947083d5d93167 --- /dev/null +++ b/examples/alertmanager-config-external.jsonnet @@ -0,0 +1,7 @@ +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + alertmanager+: { + config: importstr 'alertmanager-config.yaml', + }, + }, + }).alertmanager.secret diff --git a/examples/alertmanager-config.jsonnet b/examples/alertmanager-config.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..f08dbe19e333073d6553fda87bf5b4671787c0ac --- /dev/null +++ b/examples/alertmanager-config.jsonnet @@ -0,0 +1,22 @@ +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + alertmanager+: { + config: ||| + global: + resolve_timeout: 10m + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - match: + alertname: Watchdog + receiver: 'null' + receivers: + - name: 'null' + |||, + }, + }, + }).alertmanager.secret diff --git a/examples/alertmanager-config.yaml b/examples/alertmanager-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b341b55f169a71236547468fecabe42ce625551b --- /dev/null +++ b/examples/alertmanager-config.yaml @@ -0,0 +1,15 @@ +# external alertmanager yaml +global: + resolve_timeout: 10m +route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - match: + alertname: Watchdog + receiver: 'null' +receivers: +- name: 'null' diff --git a/examples/auth b/examples/auth new file mode 100644 index 0000000000000000000000000000000000000000..95692021a5214dc6a2dbf01fcd7e42bdf00f7e3e --- /dev/null +++ b/examples/auth @@ -0,0 +1,2 @@ +# This file should not ever be used, it's just a mock. +dontusethis:$apr1$heg6VIp7$1PSzJ/Z6fYboQ5pYrbgSy. diff --git a/examples/basic-auth/secrets.yaml b/examples/basic-auth/secrets.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa0dd897c0d2a66c096dc33f08274fbba0197ad4 --- /dev/null +++ b/examples/basic-auth/secrets.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: basic-auth +data: + password: dG9vcg== # toor + user: YWRtaW4= # admin +type: Opaque \ No newline at end of file diff --git a/examples/basic-auth/service-monitor.yaml b/examples/basic-auth/service-monitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e62b9fa9c6705ba7075339bca37289dac68bedb6 --- /dev/null +++ b/examples/basic-auth/service-monitor.yaml @@ -0,0 +1,22 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-apps: basic-auth-example + name: basic-auth-example +spec: + endpoints: + - basicAuth: + password: + name: basic-auth + key: password + username: + name: basic-auth + key: user + port: metrics + namespaceSelector: + matchNames: + - logging + selector: + matchLabels: + app: myapp \ No newline at end of file diff --git a/examples/etcd-client-ca.crt b/examples/etcd-client-ca.crt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/examples/etcd-client.crt b/examples/etcd-client.crt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/examples/etcd-client.key b/examples/etcd-client.key new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/examples/etcd-skip-verify.jsonnet b/examples/etcd-skip-verify.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..603ba710e48a5cefd0c2c80df0796fb2e5ce5484 --- /dev/null +++ b/examples/etcd-skip-verify.jsonnet @@ -0,0 +1,22 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + + (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + { + _config+:: { + namespace: 'monitoring', + + etcd+:: { + ips: ['127.0.0.1'], + clientCA: importstr 'etcd-client-ca.crt', + clientKey: importstr 'etcd-client.key', + clientCert: importstr 'etcd-client.crt', + insecureSkipVerify: true, + }, + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/etcd.jsonnet b/examples/etcd.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..e26c957142198c5cf3b4672fd871c6d217af44d0 --- /dev/null +++ b/examples/etcd.jsonnet @@ -0,0 +1,53 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + + (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + { + _config+:: { + namespace: 'monitoring', + + // Reference info: https://github.com/coreos/prometheus-operator/blob/master/contrib/kube-prometheus/README.md#static-etcd-configuration + etcd+:: { + // Configure this to be the IP(s) to scrape - i.e. your etcd node(s) (use commas to separate multiple values). + ips: ['127.0.0.1'], + + // Reference info: + // * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitorspec (has endpoints) + // * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#endpoint (has tlsConfig) + // * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig (has: caFile, certFile, keyFile, serverName, & insecureSkipVerify) + + // Set these three variables to the fully qualified directory path on your work machine to the certificate files that are valid to scrape etcd metrics with (check the apiserver container). + // Most likely these certificates are generated somewhere in an infrastructure repository, so using the jsonnet `importstr` function can + // be useful here. (Kube-aws stores these three files inside the credential folder.) + // All the sensitive information on the certificates will end up in a Kubernetes Secret. + clientCA: importstr 'etcd-client-ca.crt', + clientKey: importstr 'etcd-client.key', + clientCert: importstr 'etcd-client.crt', + + // Note that you should specify a value EITHER for 'serverName' OR for 'insecureSkipVerify'. (Don't specify a value for both of them, and don't specify a value for neither of them.) + // * Specifying serverName: Ideally you should provide a valid value for serverName (and then insecureSkipVerify should be left as false - so that serverName gets used). + // * Specifying insecureSkipVerify: insecureSkipVerify is only to be used (i.e. set to true) if you cannot (based on how your etcd certificates were created) use a Subject Alternative Name. + // * If you specify a value: + // ** for both of these variables: When 'insecureSkipVerify: true' is specified, then also specifying a value for serverName won't hurt anything but it will be ignored. + // ** for neither of these variables: then you'll get authentication errors on the prom '/targets' page with your etcd targets. + + // A valid name (DNS or Subject Alternative Name) that the client (i.e. prometheus) will use to verify the etcd TLS certificate. + // * Note that doing `nslookup etcd.kube-system.svc.cluster.local` (on a pod in a K8s cluster where kube-prometheus has been installed) shows that kube-prometheus sets up this hostname. + // * `openssl x509 -noout -text -in etcd-client.pem` will print the Subject Alternative Names. + serverName: 'etcd.kube-system.svc.cluster.local', + + // When insecureSkipVerify isn't specified, the default value is "false". + //insecureSkipVerify: true, + + // In case you have generated the etcd certificate with kube-aws: + // * If you only have one etcd node, you can use the value from 'etcd.internalDomainName' (specified in your kube-aws cluster.yaml) as the value for 'serverName'. + // * But if you have multiple etcd nodes, you will need to use 'insecureSkipVerify: true' (if using default certificate generators method), as the valid certificate domain + // will be different for each etcd node. (kube-aws default certificates are not valid against the IP - they were created for the DNS.) + }, + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/example-app/example-app.yaml b/examples/example-app/example-app.yaml new file mode 100644 index 0000000000000000000000000000000000000000..708e8afabb50fc4fd7d2c0fcf9623dd92fd328be --- /dev/null +++ b/examples/example-app/example-app.yaml @@ -0,0 +1,36 @@ +kind: Service +apiVersion: v1 +metadata: + name: example-app + labels: + tier: frontend + namespace: default +spec: + selector: + app: example-app + ports: + - name: web + protocol: TCP + port: 8080 + targetPort: web +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: example-app + namespace: default +spec: + replicas: 4 + template: + metadata: + labels: + app: example-app + version: 1.1.3 + spec: + containers: + - name: example-app + image: quay.io/fabxc/prometheus_demo_service + ports: + - name: web + containerPort: 8080 + protocol: TCP diff --git a/examples/example-app/prometheus-frontend-alertmanager-discovery-role-binding.yaml b/examples/example-app/prometheus-frontend-alertmanager-discovery-role-binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09b3f5e419edbb6353891f035e326d817aff19d0 --- /dev/null +++ b/examples/example-app/prometheus-frontend-alertmanager-discovery-role-binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: RoleBinding +metadata: + name: prometheus-frontend + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: alertmanager-discovery +subjects: +- kind: ServiceAccount + name: prometheus-frontend + namespace: default diff --git a/examples/example-app/prometheus-frontend-alertmanager-discovery-role.yaml b/examples/example-app/prometheus-frontend-alertmanager-discovery-role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84319cdd1d855f182c46b91da5c45d6ffcda6651 --- /dev/null +++ b/examples/example-app/prometheus-frontend-alertmanager-discovery-role.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: Role +metadata: + name: alertmanager-discovery + namespace: monitoring +rules: +- apiGroups: [""] + resources: + - services + - endpoints + - pods + verbs: ["list", "watch"] diff --git a/examples/example-app/prometheus-frontend-role-binding.yaml b/examples/example-app/prometheus-frontend-role-binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d6bea2cfe055c21c25756e27cf81bc06eade291 --- /dev/null +++ b/examples/example-app/prometheus-frontend-role-binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: RoleBinding +metadata: + name: prometheus-frontend + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-frontend +subjects: +- kind: ServiceAccount + name: prometheus-frontend + namespace: default diff --git a/examples/example-app/prometheus-frontend-role.yaml b/examples/example-app/prometheus-frontend-role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79d505011afa95f13ab4b919d04dcefee94743f4 --- /dev/null +++ b/examples/example-app/prometheus-frontend-role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: Role +metadata: + name: prometheus-frontend + namespace: default +rules: +- apiGroups: [""] + resources: + - nodes + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: + - configmaps + verbs: ["get"] diff --git a/examples/example-app/prometheus-frontend-service-account.yaml b/examples/example-app/prometheus-frontend-service-account.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4dd7c26be6408de223ba3614a904cab3ac9d908c --- /dev/null +++ b/examples/example-app/prometheus-frontend-service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-frontend + namespace: default diff --git a/examples/example-app/prometheus-frontend-svc.yaml b/examples/example-app/prometheus-frontend-svc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7002e8f18a691e4fddbd9cd29ae9ec3fcf8f0188 --- /dev/null +++ b/examples/example-app/prometheus-frontend-svc.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus-frontend + namespace: default +spec: + type: NodePort + ports: + - name: web + nodePort: 30100 + port: 9090 + protocol: TCP + targetPort: web + selector: + prometheus: frontend diff --git a/examples/example-app/prometheus-frontend.yaml b/examples/example-app/prometheus-frontend.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d56515293e6db54fe99cbf91015ee596b3c04877 --- /dev/null +++ b/examples/example-app/prometheus-frontend.yaml @@ -0,0 +1,25 @@ +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +metadata: + name: frontend + namespace: default + labels: + prometheus: frontend +spec: + serviceAccountName: prometheus-frontend + version: v1.7.1 + serviceMonitorSelector: + matchLabels: + tier: frontend + resources: + requests: + # 2Gi is default, but won't schedule if you don't have a node with >2Gi + # memory. Modify based on your target and time-series count for + # production use. This value is mainly meant for demonstration/testing + # purposes. + memory: 400Mi + alerting: + alertmanagers: + - namespace: monitoring + name: alertmanager-main + port: web diff --git a/examples/example-app/servicemonitor-frontend.yaml b/examples/example-app/servicemonitor-frontend.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bc935af75770096dac81820e544e29912737348 --- /dev/null +++ b/examples/example-app/servicemonitor-frontend.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: frontend + namespace: default + labels: + tier: frontend +spec: + selector: + matchLabels: + tier: frontend + targetLabels: + - tier + endpoints: + - port: web + interval: 10s + namespaceSelector: + matchNames: + - default diff --git a/examples/example-grafana-dashboard.json b/examples/example-grafana-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..a891040be1c044ad2fa596fdd0f95f9750df0318 --- /dev/null +++ b/examples/example-grafana-dashboard.json @@ -0,0 +1,177 @@ +{ + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetid": null, + "graphtooltip": 0, + "hidecontrols": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "height": "250px", + "panels": [ + { + "aliascolors": { + + }, + "bars": false, + "dashlength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridpos": { + + }, + "id": 2, + "legend": { + "alignastable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightside": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullpointmode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesoverrides": [ + + ], + "spacelength": 10, + "span": 6, + "stack": false, + "steppedline": false, + "targets": [ + { + "expr": "vector(1)", + "format": "time_series", + "intervalfactor": 2, + "legendformat": "", + "refid": "a" + } + ], + "thresholds": [ + + ], + "timefrom": null, + "timeshift": null, + "title": "my panel", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logbase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logbase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatiteration": null, + "repeatrowid": null, + "showtitle": false, + "title": "dashboard row", + "titlesize": "h6", + "type": "row" + } + ], + "schemaversion": 14, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "prometheus", + "value": "prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "my dashboard", + "version": 0 +} diff --git a/examples/existingrule.json b/examples/existingrule.json new file mode 100644 index 0000000000000000000000000000000000000000..41d6620b87749b68f9aa222b9582a27ac0469068 --- /dev/null +++ b/examples/existingrule.json @@ -0,0 +1 @@ +{"groups":[{"name":"example-group","rules":[{"alert":"Watchdog","annotations":{"description":"This is a Watchdog meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]} \ No newline at end of file diff --git a/examples/existingrule.yaml b/examples/existingrule.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a67032f96accb758e0ab108c1c72be037b7efe4 --- /dev/null +++ b/examples/existingrule.yaml @@ -0,0 +1,9 @@ +groups: +- name: example-group + rules: + - alert: Watchdog + expr: vector(1) + labels: + severity: "none" + annotations: + description: This is a Watchdog meant to ensure that the entire alerting pipeline is functional. diff --git a/examples/grafana-additional-jsonnet-dashboard-example.jsonnet b/examples/grafana-additional-jsonnet-dashboard-example.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..578d6a1fe7eb53f0f84ba0227d1128ea355695e4 --- /dev/null +++ b/examples/grafana-additional-jsonnet-dashboard-example.jsonnet @@ -0,0 +1,45 @@ +local grafana = import 'grafonnet/grafana.libsonnet'; +local dashboard = grafana.dashboard; +local row = grafana.row; +local prometheus = grafana.prometheus; +local template = grafana.template; +local graphPanel = grafana.graphPanel; + +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + grafanaDashboards+:: { + 'my-dashboard.json': + dashboard.new('My Dashboard') + .addTemplate( + { + current: { + text: 'Prometheus', + value: 'Prometheus', + }, + hide: 0, + label: null, + name: 'datasource', + options: [], + query: 'prometheus', + refresh: 1, + regex: '', + type: 'datasource', + }, + ) + .addRow( + row.new() + .addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource') + .addTarget(prometheus.target('vector(1)'))) + ), + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/grafana-additional-rendered-dashboard-example.jsonnet b/examples/grafana-additional-rendered-dashboard-example.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..8aa26bdc06459ea9b82bbab3f71a33e76a343ad7 --- /dev/null +++ b/examples/grafana-additional-rendered-dashboard-example.jsonnet @@ -0,0 +1,16 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + grafanaDashboards+:: { + 'my-dashboard.json': (import 'example-grafana-dashboard.json'), + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/ingress.jsonnet b/examples/ingress.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..4aba212a85869656f8b9159b6ff3e1cd7abd7f2f --- /dev/null +++ b/examples/ingress.jsonnet @@ -0,0 +1,104 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local secret = k.core.v1.secret; +local ingress = k.extensions.v1beta1.ingress; +local ingressTls = ingress.mixin.spec.tlsType; +local ingressRule = ingress.mixin.spec.rulesType; +local httpIngressPath = ingressRule.mixin.http.pathsType; + +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + + { + _config+:: { + namespace: 'monitoring', + }, + // Configure External URL's per application + alertmanager+:: { + alertmanager+: { + spec+: { + externalUrl: 'http://alertmanager.example.com', + }, + }, + }, + grafana+:: { + config+: { + sections+: { + server+: { + root_url: 'http://grafana.example.com/', + }, + }, + }, + }, + prometheus+:: { + prometheus+: { + spec+: { + externalUrl: 'http://prometheus.example.com', + }, + }, + }, + // Create ingress objects per application + ingress+:: { + 'alertmanager-main': + ingress.new() + + ingress.mixin.metadata.withName('alertmanager-main') + + ingress.mixin.metadata.withNamespace($._config.namespace) + + ingress.mixin.metadata.withAnnotations({ + 'nginx.ingress.kubernetes.io/auth-type': 'basic', + 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', + 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', + }) + + ingress.mixin.spec.withRules( + ingressRule.new() + + ingressRule.withHost('alertmanager.example.com') + + ingressRule.mixin.http.withPaths( + httpIngressPath.new() + + httpIngressPath.mixin.backend.withServiceName('alertmanager-main') + + httpIngressPath.mixin.backend.withServicePort('web') + ), + ), + grafana: + ingress.new() + + ingress.mixin.metadata.withName('grafana') + + ingress.mixin.metadata.withNamespace($._config.namespace) + + ingress.mixin.metadata.withAnnotations({ + 'nginx.ingress.kubernetes.io/auth-type': 'basic', + 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', + 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', + }) + + ingress.mixin.spec.withRules( + ingressRule.new() + + ingressRule.withHost('grafana.example.com') + + ingressRule.mixin.http.withPaths( + httpIngressPath.new() + + httpIngressPath.mixin.backend.withServiceName('grafana') + + httpIngressPath.mixin.backend.withServicePort('http') + ), + ), + 'prometheus-k8s': + ingress.new() + + ingress.mixin.metadata.withName('prometheus-k8s') + + ingress.mixin.metadata.withNamespace($._config.namespace) + + ingress.mixin.metadata.withAnnotations({ + 'nginx.ingress.kubernetes.io/auth-type': 'basic', + 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', + 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', + }) + + ingress.mixin.spec.withRules( + ingressRule.new() + + ingressRule.withHost('prometheus.example.com') + + ingressRule.mixin.http.withPaths( + httpIngressPath.new() + + httpIngressPath.mixin.backend.withServiceName('prometheus-k8s') + + httpIngressPath.mixin.backend.withServicePort('web') + ), + ), + }, + } + { + // Create basic auth secret - replace 'auth' file with your own + ingress+:: { + 'basic-auth-secret': + secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) + + secret.mixin.metadata.withNamespace($._config.namespace), + }, + }; + +{ [name + '-ingress']: kp.ingress[name] for name in std.objectFields(kp.ingress) } diff --git a/examples/internal-registry.jsonnet b/examples/internal-registry.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..f1d1e8ac9e49774b16cc4b97a5803f67b0f790c2 --- /dev/null +++ b/examples/internal-registry.jsonnet @@ -0,0 +1,14 @@ +local mixin = import 'kube-prometheus/kube-prometheus-config-mixins.libsonnet'; +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, +} + mixin.withImageRepository('internal-registry.com/organization'); + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/jsonnet-build-snippet/build-snippet.jsonnet b/examples/jsonnet-build-snippet/build-snippet.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..5a11cef62c585a272a6929d3c8fde8d7b104b32e --- /dev/null +++ b/examples/jsonnet-build-snippet/build-snippet.jsonnet @@ -0,0 +1,7 @@ +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/jsonnet-snippets/bootkube.jsonnet b/examples/jsonnet-snippets/bootkube.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..f7386a012ab6e8594d6d833aee3264e5e40caf7d --- /dev/null +++ b/examples/jsonnet-snippets/bootkube.jsonnet @@ -0,0 +1,2 @@ +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet') diff --git a/examples/jsonnet-snippets/kops-coredns.jsonnet b/examples/jsonnet-snippets/kops-coredns.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..6ba445dff751e10c8c52e2c7c57dd08eb43cd6e9 --- /dev/null +++ b/examples/jsonnet-snippets/kops-coredns.jsonnet @@ -0,0 +1,3 @@ +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet') diff --git a/examples/jsonnet-snippets/kops.jsonnet b/examples/jsonnet-snippets/kops.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..4ff9ceaea1f8dca3b474d0cb5d1f4e6d895a71ea --- /dev/null +++ b/examples/jsonnet-snippets/kops.jsonnet @@ -0,0 +1,2 @@ +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kops.libsonnet') diff --git a/examples/jsonnet-snippets/kube-aws.jsonnet b/examples/jsonnet-snippets/kube-aws.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..b0842eb24e1dde19565afd6ae88ff52dab819b26 --- /dev/null +++ b/examples/jsonnet-snippets/kube-aws.jsonnet @@ -0,0 +1,2 @@ +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kube-aws.libsonnet') diff --git a/examples/jsonnet-snippets/kubeadm.jsonnet b/examples/jsonnet-snippets/kubeadm.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..a7837163e4ffe9cae140382eae63dde0d2395321 --- /dev/null +++ b/examples/jsonnet-snippets/kubeadm.jsonnet @@ -0,0 +1,2 @@ +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') diff --git a/examples/jsonnet-snippets/kubespray.jsonnet b/examples/jsonnet-snippets/kubespray.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..1665cf7285921d27f4e445c68087ff6cd8a175f4 --- /dev/null +++ b/examples/jsonnet-snippets/kubespray.jsonnet @@ -0,0 +1,2 @@ +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-kubespray.libsonnet') diff --git a/examples/jsonnet-snippets/node-ports.jsonnet b/examples/jsonnet-snippets/node-ports.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..c02f1ae729edd7f8fb9df8f39d8a8f2a68c600c7 --- /dev/null +++ b/examples/jsonnet-snippets/node-ports.jsonnet @@ -0,0 +1,2 @@ +(import 'kube-prometheus/kube-prometheus.libsonnet') + +(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') diff --git a/examples/ksonnet-example.jsonnet b/examples/ksonnet-example.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..565d113fd1445359c180e0049e5ffbc155f11c14 --- /dev/null +++ b/examples/ksonnet-example.jsonnet @@ -0,0 +1,9 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local daemonset = k.apps.v1beta2.daemonSet; + +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + nodeExporter+: { + daemonset+: + daemonset.mixin.metadata.withNamespace('my-custom-namespace'), + }, + }).nodeExporter.daemonset diff --git a/examples/kustomize.jsonnet b/examples/kustomize.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..1b6b903865e34044e0beeca678b236e6231670af --- /dev/null +++ b/examples/kustomize.jsonnet @@ -0,0 +1,28 @@ +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + }; + +local manifests = + { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + + { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + + { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + + { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + + { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + + { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + + { ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + + { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }; + +local kustomizationResourceFile(name) = './manifests/' + name + '.yaml'; +local kustomization = { + apiVersion: 'kustomize.config.k8s.io/v1beta1', + kind: 'Kustomization', + resources: std.map(kustomizationResourceFile, std.objectFields(manifests)), +}; + +manifests { + '../kustomization': kustomization, +} + diff --git a/examples/minikube.jsonnet b/examples/minikube.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..3073612a4849c849d0e366c366795edbed243c7b --- /dev/null +++ b/examples/minikube.jsonnet @@ -0,0 +1,59 @@ +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + + (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') + + // Note that NodePort type services is likely not a good idea for your production use case, it is only used for demonstration purposes here. + (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') + + { + _config+:: { + namespace: 'monitoring', + alertmanager+:: { + config: importstr 'alertmanager-config.yaml', + }, + grafana+:: { + config: { // http://docs.grafana.org/installation/configuration/ + sections: { + // Do not require grafana users to login/authenticate + 'auth.anonymous': { enabled: true }, + }, + }, + }, + }, + + // For simplicity, each of the following values for 'externalUrl': + // * assume that `minikube ip` prints "192.168.99.100" + // * hard-code the NodePort for each app + prometheus+:: { + prometheus+: { + // Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec + spec+: { + // An e.g. of the purpose of this is so the "Source" links on http://<alert-manager>/#/alerts are valid. + externalUrl: 'http://192.168.99.100:30900', + + // Reference info: "external_labels" on https://prometheus.io/docs/prometheus/latest/configuration/configuration/ + externalLabels: { + // This 'cluster' label will be included on every firing prometheus alert. (This is more useful + // when running multiple clusters in a shared environment (e.g. AWS) with other users.) + cluster: 'minikube-<INSERT YOUR USERNAME HERE>', + }, + }, + }, + }, + alertmanager+:: { + alertmanager+: { + // Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec + spec+: { + externalUrl: 'http://192.168.99.100:30903', + + logLevel: 'debug', // So firing alerts show up in log + }, + }, + }, + }; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/prometheus-additional-alert-rule-example.jsonnet b/examples/prometheus-additional-alert-rule-example.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..622df03285bb2676f0edf3ffb2236e1220e16cbd --- /dev/null +++ b/examples/prometheus-additional-alert-rule-example.jsonnet @@ -0,0 +1,32 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + prometheusAlerts+:: { + groups+: [ + { + name: 'example-group', + rules: [ + { + alert: 'Watchdog', + expr: 'vector(1)', + labels: { + severity: 'none', + }, + annotations: { + description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.', + }, + }, + ], + }, + ], + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/prometheus-additional-recording-rule-example.jsonnet b/examples/prometheus-additional-recording-rule-example.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..7974e338c11cb84d0b60d17faa5f266fdcf5d44a --- /dev/null +++ b/examples/prometheus-additional-recording-rule-example.jsonnet @@ -0,0 +1,26 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + prometheusRules+:: { + groups+: [ + { + name: 'example-group', + rules: [ + { + record: 'some_recording_rule_name', + expr: 'vector(1)', + }, + ], + }, + ], + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/prometheus-additional-rendered-rule-example.jsonnet b/examples/prometheus-additional-rendered-rule-example.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..07ef0e50bbc10a66b15606fd58f754b889593c0c --- /dev/null +++ b/examples/prometheus-additional-rendered-rule-example.jsonnet @@ -0,0 +1,11 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + prometheusAlerts+:: (import 'existingrule.json'), +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/prometheus-name-override.jsonnet b/examples/prometheus-name-override.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..862180129d9f3440ccc158ea07e33bc440b22561 --- /dev/null +++ b/examples/prometheus-name-override.jsonnet @@ -0,0 +1,9 @@ +((import 'kube-prometheus/kube-prometheus.libsonnet') + { + prometheus+: { + prometheus+: { + metadata+: { + name: 'my-name', + }, + }, + }, + }).prometheus.prometheus diff --git a/examples/prometheus-pvc.jsonnet b/examples/prometheus-pvc.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..82716e0f514ec3ff2b76abc2ce45f8646bbe19c1 --- /dev/null +++ b/examples/prometheus-pvc.jsonnet @@ -0,0 +1,58 @@ +// Reference info: documentation for https://github.com/ksonnet/ksonnet-lib can be found at http://g.bryan.dev.hepti.center +// +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; // https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k.libsonnet - imports k8s.libsonnet +// * https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k8s.libsonnet defines things such as "persistentVolumeClaim:: {" +// +local pvc = k.core.v1.persistentVolumeClaim; // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#persistentvolumeclaim-v1-core (defines variable named 'spec' of type 'PersistentVolumeClaimSpec') + +local kp = + (import 'kube-prometheus/kube-prometheus.libsonnet') + + (import 'kube-prometheus/kube-prometheus-bootkube.libsonnet') + + { + _config+:: { + namespace: 'monitoring', + }, + + prometheus+:: { + prometheus+: { + spec+: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec + // If a value isn't specified for 'retention', then by default the '--storage.tsdb.retention=24h' arg will be passed to prometheus by prometheus-operator. + // The possible values for a prometheus <duration> are: + // * https://github.com/prometheus/common/blob/c7de230/model/time.go#L178 specifies "^([0-9]+)(y|w|d|h|m|s|ms)$" (years weeks days hours minutes seconds milliseconds) + retention: '30d', + + // Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md + // By default (if the following 'storage.volumeClaimTemplate' isn't created), prometheus will be created with an EmptyDir for the 'prometheus-k8s-db' volume (for the prom tsdb). + // This 'storage.volumeClaimTemplate' causes the following to be automatically created (via dynamic provisioning) for each prometheus pod: + // * PersistentVolumeClaim (and a corresponding PersistentVolume) + // * the actual volume (per the StorageClassName specified below) + storage: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#storagespec + volumeClaimTemplate: // (same link as above where the 'pvc' variable is defined) + pvc.new() + // http://g.bryan.dev.hepti.center/core/v1/persistentVolumeClaim/#core.v1.persistentVolumeClaim.new + + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + + + // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#resourcerequirements-v1-core (defines 'requests'), + // and https://kubernetes.io/docs/concepts/policy/resource-quotas/#storage-resource-quota (defines 'requests.storage') + pvc.mixin.spec.resources.withRequests({ storage: '100Gi' }) + + + // A StorageClass of the following name (which can be seen via `kubectl get storageclass` from a node in the given K8s cluster) must exist prior to kube-prometheus being deployed. + pvc.mixin.spec.withStorageClassName('ssd'), + + // The following 'selector' is only needed if you're using manual storage provisioning (https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md#manual-storage-provisioning). + // And note that this is not supported/allowed by AWS - uncommenting the following 'selector' line (when deploying kube-prometheus to a K8s cluster in AWS) will cause the pvc to be stuck in the Pending status and have the following error: + // * 'Failed to provision volume with StorageClass "ssd": claim.Spec.Selector is not supported for dynamic provisioning on AWS' + //pvc.mixin.spec.selector.withMatchLabels({}), + }, // storage + }, // spec + }, // prometheus + }, // prometheus + + }; + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/experimental/custom-metrics-api/.gitignore b/experimental/custom-metrics-api/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..794c008cdd9c9bb7e6974033f6c3e1039ef96906 --- /dev/null +++ b/experimental/custom-metrics-api/.gitignore @@ -0,0 +1,7 @@ +apiserver-key.pem +apiserver.csr +apiserver.pem +metrics-ca-config.json +metrics-ca.crt +metrics-ca.key +cm-adapter-serving-certs.yaml diff --git a/experimental/custom-metrics-api/README.md b/experimental/custom-metrics-api/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e93d809f36fb66b7d7906e52065b048e6155d9a5 --- /dev/null +++ b/experimental/custom-metrics-api/README.md @@ -0,0 +1,21 @@ +# Custom Metrics API + +The custom metrics API allows the HPA v2 to scale based on arbirary metrics. + +This directory contains an example deployment which extends the Prometheus Adapter, deployed with kube-prometheus, serve the [Custom Metrics API](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/instrumentation/custom-metrics-api.md) by talking to Prometheus running inside the cluster. + +Make sure you have the Prometheus Adapter up and running in the `monitoring` namespace. + +You can deploy everything in the `monitoring` namespace using `./deploy.sh`. + +When you're done, you can teardown using the `./teardown.sh` script. + +### Sample App + +Additionally, this directory contains a sample app that uses the [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) to scale the Deployment's replicas of Pods up and down as needed. +Deploy this app by running `kubectl apply -f sample-app.yaml`. +Make the app accessible on your system, for example by using `kubectl port-forward svc/sample-app 8080`. Next you need to put some load on its http endpoints. + +A tool like [hey](https://github.com/rakyll/hey) is helpful for doing so: `hey -c 20 -n 100000000 http://localhost:8080/metrics` + +There is an even more detailed information on this sample app at [luxas/kubeadm-workshop](https://github.com/luxas/kubeadm-workshop#deploying-the-prometheus-operator-for-monitoring-services-in-the-cluster). diff --git a/experimental/custom-metrics-api/custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml b/experimental/custom-metrics-api/custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2b1ca43e2d69d82d443118ec1744259a6551d5e --- /dev/null +++ b/experimental/custom-metrics-api/custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: custom-metrics-server-resources +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: custom-metrics-server-resources +subjects: +- kind: ServiceAccount + name: prometheus-adapter + namespace: monitoring diff --git a/experimental/custom-metrics-api/custom-metrics-apiservice.yaml b/experimental/custom-metrics-api/custom-metrics-apiservice.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98f874958752a68fc1e924addb00e9193b2165fd --- /dev/null +++ b/experimental/custom-metrics-api/custom-metrics-apiservice.yaml @@ -0,0 +1,13 @@ +apiVersion: apiregistration.k8s.io/v1beta1 +kind: APIService +metadata: + name: v1beta1.custom.metrics.k8s.io +spec: + service: + name: prometheus-adapter + namespace: monitoring + group: custom.metrics.k8s.io + version: v1beta1 + insecureSkipTLSVerify: true + groupPriorityMinimum: 100 + versionPriority: 100 diff --git a/experimental/custom-metrics-api/custom-metrics-cluster-role.yaml b/experimental/custom-metrics-api/custom-metrics-cluster-role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..003f0bf15f2881ab8052b150c7b5f27f4c932015 --- /dev/null +++ b/experimental/custom-metrics-api/custom-metrics-cluster-role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: custom-metrics-server-resources +rules: +- apiGroups: + - custom.metrics.k8s.io + resources: ["*"] + verbs: ["*"] diff --git a/experimental/custom-metrics-api/custom-metrics-configmap.yaml b/experimental/custom-metrics-api/custom-metrics-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e209cc3c61f085d1611e1ec6481829c3c0bad1f --- /dev/null +++ b/experimental/custom-metrics-api/custom-metrics-configmap.yaml @@ -0,0 +1,98 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: adapter-config + namespace: monitoring +data: + config.yaml: | + rules: + - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + seriesFilters: [] + resources: + overrides: + namespace: + resource: namespace + pod_name: + resource: pod + name: + matches: ^container_(.*)_seconds_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>) + - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + seriesFilters: + - isNot: ^container_.*_seconds_total$ + resources: + overrides: + namespace: + resource: namespace + pod_name: + resource: pod + name: + matches: ^container_(.*)_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>) + - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + seriesFilters: + - isNot: ^container_.*_total$ + resources: + overrides: + namespace: + resource: namespace + pod_name: + resource: pod + name: + matches: ^container_(.*)$ + as: "" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: + - isNot: .*_total$ + resources: + template: <<.Resource>> + name: + matches: "" + as: "" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: + - isNot: .*_seconds_total + resources: + template: <<.Resource>> + name: + matches: ^(.*)_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: [] + resources: + template: <<.Resource>> + name: + matches: ^(.*)_seconds_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>) + resourceRules: + cpu: + containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>) + nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[1m])) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod_name: + resource: pod + containerLabel: container_name + memory: + containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>) + nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod_name: + resource: pod + containerLabel: container_name + window: 1m diff --git a/experimental/custom-metrics-api/deploy.sh b/experimental/custom-metrics-api/deploy.sh new file mode 100644 index 0000000000000000000000000000000000000000..d276afc02148533af05608cedc6e79b6f58ae66e --- /dev/null +++ b/experimental/custom-metrics-api/deploy.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +kubectl apply -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml +kubectl apply -n monitoring -f custom-metrics-apiservice.yaml +kubectl apply -n monitoring -f custom-metrics-cluster-role.yaml +kubectl apply -n monitoring -f custom-metrics-configmap.yaml +kubectl apply -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml diff --git a/experimental/custom-metrics-api/hpa-custom-metrics-cluster-role-binding.yaml b/experimental/custom-metrics-api/hpa-custom-metrics-cluster-role-binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..530ebea5e56c5703e1879b7a16e0b005eb4c9479 --- /dev/null +++ b/experimental/custom-metrics-api/hpa-custom-metrics-cluster-role-binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: hpa-controller-custom-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: custom-metrics-server-resources +subjects: +- kind: ServiceAccount + name: horizontal-pod-autoscaler + namespace: kube-system diff --git a/experimental/custom-metrics-api/sample-app.yaml b/experimental/custom-metrics-api/sample-app.yaml new file mode 100644 index 0000000000000000000000000000000000000000..470887c65d8bec2d2fe5273105a857481507b222 --- /dev/null +++ b/experimental/custom-metrics-api/sample-app.yaml @@ -0,0 +1,67 @@ +kind: ServiceMonitor +apiVersion: monitoring.coreos.com/v1 +metadata: + name: sample-app + labels: + app: sample-app +spec: + selector: + matchLabels: + app: sample-app + endpoints: + - port: http + interval: 5s +--- +apiVersion: v1 +kind: Service +metadata: + name: sample-app + labels: + app: sample-app +spec: + ports: + - name: http + port: 8080 + targetPort: 8080 + selector: + app: sample-app +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sample-app + labels: + app: sample-app +spec: + replicas: 1 + selector: + matchLabels: + app: sample-app + template: + metadata: + labels: + app: sample-app + spec: + containers: + - image: luxas/autoscale-demo:v0.1.2 + name: metrics-provider + ports: + - name: http + containerPort: 8080 +--- +kind: HorizontalPodAutoscaler +apiVersion: autoscaling/v2beta1 +metadata: + name: sample-app +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: sample-app + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Pods + pods: + metricName: http_requests + targetAverageValue: 500m diff --git a/experimental/custom-metrics-api/teardown.sh b/experimental/custom-metrics-api/teardown.sh new file mode 100644 index 0000000000000000000000000000000000000000..b3a455f596e55e0ba7b53c7e5f6a981e39852ef3 --- /dev/null +++ b/experimental/custom-metrics-api/teardown.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +kubectl delete -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml +kubectl delete -n monitoring -f custom-metrics-apiservice.yaml +kubectl delete -n monitoring -f custom-metrics-cluster-role.yaml +kubectl delete -n monitoring -f custom-metrics-configmap.yaml +kubectl delete -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml diff --git a/experimental/metrics-server/auth-delegator.yaml b/experimental/metrics-server/auth-delegator.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04826aec5da9ce6effb269756f29417b26eb41cc --- /dev/null +++ b/experimental/metrics-server/auth-delegator.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: metrics-server:system:auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: +- kind: ServiceAccount + name: metrics-server + namespace: kube-system diff --git a/experimental/metrics-server/auth-reader.yaml b/experimental/metrics-server/auth-reader.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ab6a6a365940c82bc576438e3fb79de2ec7780a --- /dev/null +++ b/experimental/metrics-server/auth-reader.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: RoleBinding +metadata: + name: metrics-server-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: +- kind: ServiceAccount + name: metrics-server + namespace: kube-system diff --git a/experimental/metrics-server/metrics-apiservice.yaml b/experimental/metrics-server/metrics-apiservice.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8860fbcc1e82891aeb4f925d84b1b00a2fd14fe --- /dev/null +++ b/experimental/metrics-server/metrics-apiservice.yaml @@ -0,0 +1,13 @@ +apiVersion: apiregistration.k8s.io/v1beta1 +kind: APIService +metadata: + name: v1beta1.metrics.k8s.io +spec: + service: + name: metrics-server + namespace: kube-system + group: metrics.k8s.io + version: v1beta1 + insecureSkipTLSVerify: true + groupPriorityMinimum: 100 + versionPriority: 100 diff --git a/experimental/metrics-server/metrics-server-cluster-role-binding.yaml b/experimental/metrics-server/metrics-server-cluster-role-binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc63434562bd01afc91286c1271994379a427c8d --- /dev/null +++ b/experimental/metrics-server/metrics-server-cluster-role-binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:metrics-server +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:metrics-server +subjects: +- kind: ServiceAccount + name: metrics-server + namespace: kube-system diff --git a/experimental/metrics-server/metrics-server-cluster-role.yaml b/experimental/metrics-server/metrics-server-cluster-role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38844d9a69d82ed6eed956d14467becd10b91714 --- /dev/null +++ b/experimental/metrics-server/metrics-server-cluster-role.yaml @@ -0,0 +1,24 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:metrics-server +rules: +- apiGroups: + - "" + resources: + - pods + - nodes + - nodes/stats + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "extensions" + resources: + - deployments + verbs: + - get + - list + - watch diff --git a/experimental/metrics-server/metrics-server-deployment.yaml b/experimental/metrics-server/metrics-server-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..386740dac39554becdafeb85e56201607f763729 --- /dev/null +++ b/experimental/metrics-server/metrics-server-deployment.yaml @@ -0,0 +1,25 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: metrics-server + namespace: kube-system + labels: + k8s-app: metrics-server +spec: + selector: + matchLabels: + k8s-app: metrics-server + template: + metadata: + name: metrics-server + labels: + k8s-app: metrics-server + spec: + serviceAccountName: metrics-server + containers: + - name: metrics-server + image: gcr.io/google_containers/metrics-server-amd64:v0.2.0 + imagePullPolicy: Always + command: + - /metrics-server + - --source=kubernetes.summary_api:'' diff --git a/experimental/metrics-server/metrics-server-service-account.yaml b/experimental/metrics-server/metrics-server-service-account.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee205aa61f171a76e8605e2df11b9539d6eea2ae --- /dev/null +++ b/experimental/metrics-server/metrics-server-service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: metrics-server + namespace: kube-system diff --git a/experimental/metrics-server/metrics-server-service.yaml b/experimental/metrics-server/metrics-server-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..974628e05320c9bc7af6a2760cca7fd365cedee9 --- /dev/null +++ b/experimental/metrics-server/metrics-server-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: metrics-server + namespace: kube-system + labels: + kubernetes.io/name: "Metrics-server" +spec: + selector: + k8s-app: metrics-server + ports: + - port: 443 + protocol: TCP + targetPort: 443 diff --git a/grafana-image/Dockerfile b/grafana-image/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..fdbef2895290b507ed743844e436565875584b6a --- /dev/null +++ b/grafana-image/Dockerfile @@ -0,0 +1,17 @@ +FROM debian:9.3-slim + +ARG GRAFANA_VERSION + +RUN apt-get update && apt-get install -qq -y wget tar sqlite && \ + wget -O /tmp/grafana.tar.gz https://s3-us-west-2.amazonaws.com/grafana-releases/release/grafana-$GRAFANA_VERSION.linux-x64.tar.gz && \ + tar -zxvf /tmp/grafana.tar.gz -C /tmp && mv /tmp/grafana-$GRAFANA_VERSION /grafana && \ + rm -rf /tmp/grafana.tar.gz + +ADD config.ini /grafana/conf/config.ini + +USER nobody +EXPOSE 3000 +VOLUME [ "/data" ] +WORKDIR /grafana +ENTRYPOINT [ "/grafana/bin/grafana-server" ] +CMD [ "-config=/grafana/conf/config.ini" ] diff --git a/grafana-image/Makefile b/grafana-image/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8df556c667cdc7fde05a73c0018bcd41969de72a --- /dev/null +++ b/grafana-image/Makefile @@ -0,0 +1,5 @@ +VERSION=5.0.3 +IMAGE_TAG=$(VERSION) + +container: + docker build --build-arg GRAFANA_VERSION=$(VERSION) -t quay.io/coreos/monitoring-grafana:$(IMAGE_TAG) . diff --git a/grafana-image/config.ini b/grafana-image/config.ini new file mode 100644 index 0000000000000000000000000000000000000000..14e0c7adc25e9f9d26a05252062bbde103accff5 --- /dev/null +++ b/grafana-image/config.ini @@ -0,0 +1,16 @@ +[database] +path = /data/grafana.db + +[paths] +data = /data +logs = /data/log +plugins = /data/plugins + +[session] +provider = memory + +[auth.basic] +enabled = false + +[auth.anonymous] +enabled = true diff --git a/hack/example-service-monitoring/deploy b/hack/example-service-monitoring/deploy new file mode 100755 index 0000000000000000000000000000000000000000..0c7cd7c1a4632240478354f51f3aaead0ff0f990 --- /dev/null +++ b/hack/example-service-monitoring/deploy @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# exit immediately when a command fails +set -e +# only exit with zero if all commands of the pipeline exit successfully +set -o pipefail +# error on unset variables +set -u + +kubectl apply -f examples/example-app diff --git a/hack/example-service-monitoring/teardown b/hack/example-service-monitoring/teardown new file mode 100755 index 0000000000000000000000000000000000000000..1a49f4625cf5378b091762900c0d791e46a93095 --- /dev/null +++ b/hack/example-service-monitoring/teardown @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# exit immediately when a command fails +set -e +# only exit with zero if all commands of the pipeline exit successfully +set -o pipefail +# error on unset variables +set -u + +kubectl delete -f examples/example-app diff --git a/jsonnet/kube-prometheus/.gitignore b/jsonnet/kube-prometheus/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..52a75ecba7d108a48c1a496227a324b28cb0fdbe --- /dev/null +++ b/jsonnet/kube-prometheus/.gitignore @@ -0,0 +1,2 @@ +jsonnetfile.lock.json +vendor/ diff --git a/jsonnet/kube-prometheus/alertmanager/alertmanager.libsonnet b/jsonnet/kube-prometheus/alertmanager/alertmanager.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..c46a87dcbc20f97bb1275d02eb7b0b5eaf8fae72 --- /dev/null +++ b/jsonnet/kube-prometheus/alertmanager/alertmanager.libsonnet @@ -0,0 +1,125 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; + +{ + _config+:: { + namespace: 'default', + + versions+:: { + alertmanager: 'v0.16.1', + }, + + imageRepos+:: { + alertmanager: 'quay.io/prometheus/alertmanager', + }, + + alertmanager+:: { + name: $._config.alertmanager.name, + config: { + global: { + resolve_timeout: '5m', + }, + route: { + group_by: ['job'], + group_wait: '30s', + group_interval: '5m', + repeat_interval: '12h', + receiver: 'null', + routes: [ + { + receiver: 'null', + match: { + alertname: 'Watchdog', + }, + }, + ], + }, + receivers: [ + { + name: 'null', + }, + ], + }, + replicas: 3, + }, + }, + + alertmanager+:: { + secret: + local secret = k.core.v1.secret; + + if std.type($._config.alertmanager.config) == 'object' then + secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64(std.manifestYamlDoc($._config.alertmanager.config)) }) + + secret.mixin.metadata.withNamespace($._config.namespace) + else + secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64($._config.alertmanager.config) }) + + secret.mixin.metadata.withNamespace($._config.namespace), + + serviceAccount: + local serviceAccount = k.core.v1.serviceAccount; + + serviceAccount.new('alertmanager-' + $._config.alertmanager.name) + + serviceAccount.mixin.metadata.withNamespace($._config.namespace), + + service: + local service = k.core.v1.service; + local servicePort = k.core.v1.service.mixin.spec.portsType; + + local alertmanagerPort = servicePort.newNamed('web', 9093, 'web'); + + service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) + + service.mixin.spec.withSessionAffinity('ClientIP') + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }), + + serviceMonitor: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'alertmanager', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'alertmanager', + }, + }, + spec: { + selector: { + matchLabels: { + alertmanager: $._config.alertmanager.name, + }, + }, + endpoints: [ + { + port: 'web', + interval: '30s', + }, + ], + }, + }, + + alertmanager: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'Alertmanager', + metadata: { + name: $._config.alertmanager.name, + namespace: $._config.namespace, + labels: { + alertmanager: $._config.alertmanager.name, + }, + }, + spec: { + replicas: $._config.alertmanager.replicas, + version: $._config.versions.alertmanager, + baseImage: $._config.imageRepos.alertmanager, + nodeSelector: { 'beta.kubernetes.io/os': 'linux' }, + serviceAccountName: 'alertmanager-' + $._config.alertmanager.name, + securityContext: { + runAsUser: 1000, + runAsNonRoot: true, + fsGroup: 2000, + }, + }, + }, + }, +} diff --git a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..bf58862dd26a16b718a2b3d931faa0ef2819d744 --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet @@ -0,0 +1,52 @@ +{ + prometheusAlerts+:: { + groups+: [ + { + name: 'alertmanager.rules', + rules: [ + { + alert: 'AlertmanagerConfigInconsistent', + annotations: { + message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.', + }, + expr: ||| + count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 + ||| % $._config, + 'for': '5m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'AlertmanagerFailedReload', + annotations: { + message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.", + }, + expr: ||| + alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + { + alert:'AlertmanagerMembersInconsistent', + annotations:{ + message: 'Alertmanager has not found all other members of the cluster.', + }, + expr: ||| + alertmanager_cluster_members{%(alertmanagerSelector)s} + != on (service) GROUP_LEFT() + count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s}) + ||| % $._config, + 'for': '5m', + labels: { + severity: 'critical', + }, + }, + ], + }, + ], + }, +} diff --git a/jsonnet/kube-prometheus/alerts/alerts.libsonnet b/jsonnet/kube-prometheus/alerts/alerts.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..1b2d94eb2c207e30fd9b92db70374971e15bc05f --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/alerts.libsonnet @@ -0,0 +1,5 @@ +(import 'alertmanager.libsonnet') + +(import 'general.libsonnet') + +(import 'node.libsonnet') + +(import 'prometheus.libsonnet') + +(import 'prometheus-operator.libsonnet') diff --git a/jsonnet/kube-prometheus/alerts/general.libsonnet b/jsonnet/kube-prometheus/alerts/general.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..8802097e071f81fdb6cb83f842fc292937cbbdb7 --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/general.libsonnet @@ -0,0 +1,38 @@ +{ + prometheusAlerts+:: { + groups+: [ + { + name: 'general.rules', + rules: [ + { + alert: 'TargetDown', + annotations: { + message: '{{ $value }}% of the {{ $labels.job }} targets are down.', + }, + expr: '100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10', + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'Watchdog', + annotations: { + message: ||| + This is an alert meant to ensure that the entire alerting pipeline is functional. + This alert is always firing, therefore it should always be firing in Alertmanager + and always fire against a receiver. There are integrations with various notification + mechanisms that send a notification when this alert is not firing. For example the + "DeadMansSnitch" integration in PagerDuty. + |||, + }, + expr: 'vector(1)', + labels: { + severity: 'none', + }, + }, + ], + }, + ], + }, +} diff --git a/jsonnet/kube-prometheus/alerts/node.libsonnet b/jsonnet/kube-prometheus/alerts/node.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..3dca1b0aad0c9b42d2fcdb30fcf32a32fa13f99d --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/node.libsonnet @@ -0,0 +1,112 @@ +{ + prometheusAlerts+:: { + groups+: [ + { + name: 'kube-prometheus-node-alerting.rules', + rules: [ + { + alert: 'NodeDiskRunningFull', + annotations: { + message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 24 hours.', + }, + expr: ||| + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0) + ||| % $._config, + 'for': '30m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'NodeDiskRunningFull', + annotations: { + message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 2 hours.', + }, + expr: ||| + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0) + ||| % $._config, + 'for': '10m', + labels: { + severity: 'critical', + }, + }, + ], + }, + { + name: 'node-time', + rules: [ + { + alert: 'ClockSkewDetected', + annotations: { + message: 'Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host.', + }, + expr: ||| + node_ntp_offset_seconds{%(nodeExporterSelector)s} < -0.03 or node_ntp_offset_seconds{%(nodeExporterSelector)s} > 0.03 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + ], + }, + { + name: 'node-network', + rules: [ + { + alert: 'NetworkReceiveErrors', + annotations: { + message: 'Network interface "{{ $labels.device }}" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + rate(node_network_receive_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'NetworkTransmitErrors', + annotations: { + message: 'Network interface "{{ $labels.device }}" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + rate(node_network_transmit_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'NodeNetworkInterfaceDown', + annotations: { + message: 'Network interface "{{ $labels.device }}" down on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s} == 0 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'NodeNetworkInterfaceFlapping', + annotations: { + message: 'Network interface "{{ $labels.device }}" changing it\'s up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + changes(node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 2 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + ], + }, + ], + }, +} diff --git a/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet b/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..a430c5050a26abbbc3e2a3e8b72197aeaafd8061 --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet @@ -0,0 +1,37 @@ +{ + prometheusAlerts+:: { + groups+: [ + { + name: 'prometheus-operator', + rules: [ + { + alert: 'PrometheusOperatorReconcileErrors', + expr: ||| + rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 + ||| % $._config, + labels: { + severity: 'warning', + }, + annotations: { + message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.', + }, + 'for': '10m', + }, + { + alert: 'PrometheusOperatorNodeLookupErrors', + expr: ||| + rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 + ||| % $._config, + labels: { + severity: 'warning', + }, + annotations: { + message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.', + }, + 'for': '10m', + }, + ], + }, + ], + }, +} diff --git a/jsonnet/kube-prometheus/alerts/prometheus.libsonnet b/jsonnet/kube-prometheus/alerts/prometheus.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..a2d0cc67af8a56218028193bcd8d037f74c5e83a --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/prometheus.libsonnet @@ -0,0 +1,151 @@ +{ + prometheusAlerts+:: { + groups+: [ + { + name: 'prometheus.rules', + rules: [ + { + alert: 'PrometheusConfigReloadFailed', + annotations: { + description: "Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}", + summary: "Reloading Prometheus' configuration failed", + }, + expr: ||| + prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusNotificationQueueRunningFull', + annotations: { + description: "Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ $labels.pod}}", + summary: "Prometheus' alert notification queue is running full", + }, + expr: ||| + predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30) > prometheus_notifications_queue_capacity{%(prometheusSelector)s} + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusErrorSendingAlerts', + annotations: { + description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}', + summary: 'Errors while sending alert from Prometheus', + }, + expr: ||| + rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.01 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusErrorSendingAlerts', + annotations: { + description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}', + summary: 'Errors while sending alerts from Prometheus', + }, + expr: ||| + rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.03 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PrometheusNotConnectedToAlertmanagers', + annotations: { + description: 'Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected to any Alertmanagers', + summary: 'Prometheus is not connected to any Alertmanagers', + }, + expr: ||| + prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s} < 1 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusTSDBReloadsFailing', + annotations: { + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} reload failures over the last four hours.', + summary: 'Prometheus has issues reloading data blocks from disk', + }, + expr: ||| + increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0 + ||| % $._config, + 'for': '12h', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusTSDBCompactionsFailing', + annotations: { + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} compaction failures over the last four hours.', + summary: 'Prometheus has issues compacting sample blocks', + }, + expr: ||| + increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0 + ||| % $._config, + 'for': '12h', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusTSDBWALCorruptions', + annotations: { + description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead log (WAL).', + summary: 'Prometheus write-ahead log is corrupted', + }, + expr: ||| + prometheus_tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0 + ||| % $._config, + 'for': '4h', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusNotIngestingSamples', + annotations: { + description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples.", + summary: "Prometheus isn't ingesting samples", + }, + expr: ||| + rate(prometheus_tsdb_head_samples_appended_total{%(prometheusSelector)s}[5m]) <= 0 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PrometheusTargetScrapesDuplicate', + annotations: { + description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate timestamps but different values', + summary: 'Prometheus has many samples rejected', + }, + expr: ||| + increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{%(prometheusSelector)s}[5m]) > 0 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + }, + ], + }, + ], + }, +} diff --git a/jsonnet/kube-prometheus/alerts/tests.yaml b/jsonnet/kube-prometheus/alerts/tests.yaml new file mode 100644 index 0000000000000000000000000000000000000000..532bb895561757e0024a66ef045dbce11f90afc8 --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/tests.yaml @@ -0,0 +1,157 @@ +# TODO(metalmatze): This file is temporarily saved here for later reference +# until we find out how to integrate the tests into our jsonnet stack. + +rule_files: + - rules.yaml + +evaluation_interval: 1m + +tests: + - interval: 1m + input_series: + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}' + values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}' + values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}' + values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' + alert_rule_test: + - eval_time: 5m + alertname: AlertmanagerMembersInconsistent + - eval_time: 11m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.0 + namespace: monitoring + pod: alertmanager-main-0 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 17m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.0 + namespace: monitoring + pod: alertmanager-main-0 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 23m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.0 + namespace: monitoring + pod: alertmanager-main-0 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - interval: 1m + input_series: + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}' + values: '3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}' + values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}' + values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2' + alert_rule_test: + - eval_time: 5m + alertname: AlertmanagerMembersInconsistent + - eval_time: 11m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.0 + namespace: monitoring + pod: alertmanager-main-0 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.1 + namespace: monitoring + pod: alertmanager-main-1 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.2 + namespace: monitoring + pod: alertmanager-main-2 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 17m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.0 + namespace: monitoring + pod: alertmanager-main-0 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.1 + namespace: monitoring + pod: alertmanager-main-1 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.2 + namespace: monitoring + pod: alertmanager-main-2 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 23m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.0 + namespace: monitoring + pod: alertmanager-main-0 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.1 + namespace: monitoring + pod: alertmanager-main-1 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - exp_labels: + service: 'alertmanager-main' + severity: critical + job: 'alertmanager-main' + instance: 10.10.10.2 + namespace: monitoring + pod: alertmanager-main-2 + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' diff --git a/jsonnet/kube-prometheus/jsonnetfile.json b/jsonnet/kube-prometheus/jsonnetfile.json new file mode 100644 index 0000000000000000000000000000000000000000..5f481c36e51de27645df8298e76b366705c2024d --- /dev/null +++ b/jsonnet/kube-prometheus/jsonnetfile.json @@ -0,0 +1,54 @@ +{ + "dependencies": [ + { + "name": "ksonnet", + "source": { + "git": { + "remote": "https://github.com/ksonnet/ksonnet-lib", + "subdir": "" + } + }, + "version": "master" + }, + { + "name": "kubernetes-mixin", + "source": { + "git": { + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", + "subdir": "" + } + }, + "version": "master" + }, + { + "name": "grafana", + "source": { + "git": { + "remote": "https://github.com/brancz/kubernetes-grafana", + "subdir": "grafana" + } + }, + "version": "master" + }, + { + "name": "prometheus-operator", + "source": { + "git": { + "remote": "https://github.com/coreos/prometheus-operator", + "subdir": "jsonnet/prometheus-operator" + } + }, + "version": "v0.29.0" + }, + { + "name": "etcd-mixin", + "source": { + "git": { + "remote": "https://github.com/coreos/etcd", + "subdir": "Documentation/etcd-mixin" + } + }, + "version": "master" + } + ] +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-anti-affinity.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-anti-affinity.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..6956e3db64ec4ff6926b985cd526a9ab7a601766 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-anti-affinity.libsonnet @@ -0,0 +1,39 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local statefulSet = k.apps.v1beta2.statefulSet; +local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType; +local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType; + +{ + local antiaffinity(key, values) = { + affinity: { + podAntiAffinity: { + preferredDuringSchedulingIgnoredDuringExecution: [ + affinity.new() + + affinity.withWeight(100) + + affinity.mixin.podAffinityTerm.withNamespaces($._config.namespace) + + affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') + + affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([ + matchExpression.new() + + matchExpression.withKey(key) + + matchExpression.withOperator('In') + + matchExpression.withValues(values), + ]), + ], + }, + }, + }, + + alertmanager+:: { + alertmanager+: { + spec+: + antiaffinity('alertmanager', [$._config.alertmanager.name]), + }, + }, + + prometheus+: { + prometheus+: { + spec+: + antiaffinity('prometheus', [$._config.prometheus.name]), + }, + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-bootkube.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-bootkube.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..a9cf3bb32c74d6086be838f668fae2655904ed19 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-bootkube.libsonnet @@ -0,0 +1,23 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+:: { + kubeControllerManagerPrometheusDiscoveryService: + service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + + service.mixin.spec.withClusterIp('None'), + kubeSchedulerPrometheusDiscoveryService: + service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + + service.mixin.spec.withClusterIp('None'), + kubeDnsPrometheusDiscoveryService: + service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-config-mixins.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-config-mixins.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..ad2784076d0624b86a191cc8b8a82bb7cace31f9 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-config-mixins.libsonnet @@ -0,0 +1,20 @@ +local l = import 'lib/lib.libsonnet'; + +// withImageRepository is a mixin that replaces all images prefixes by repository. eg. +// quay.io/coreos/addon-resizer -> $repository/addon-resizer +// grafana/grafana -> grafana $repository/grafana +local withImageRepository(repository) = { + local oldRepos = super._config.imageRepos, + local substituteRepository(image, repository) = + if repository == null then image else repository + '/' + l.imageName(image), + _config+:: { + imageRepos:: { + [field]: substituteRepository(oldRepos[field], repository), + for field in std.objectFields(oldRepos) + } + }, +}; + +{ + withImageRepository:: withImageRepository, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..1bd64e1b3b21ab84bd79b2ebb2a86d3e24d0ab07 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet @@ -0,0 +1,25 @@ +{ + prometheus+:: { + serviceMonitorKubelet+: + { + spec+: { + endpoints: [ + { + port: 'http-metrics', + scheme: 'http', + interval: '30s', + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + }, + { + port: 'http-metrics', + scheme: 'http', + path: '/metrics/cadvisor', + interval: '30s', + honorLabels: true, + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + }, + ], + }, + }, + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-kops-coredns.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-kops-coredns.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..4c61087263dc4ce5a6c7485d821b8c1d0428baa2 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-kops-coredns.libsonnet @@ -0,0 +1,13 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+:: { + kubeDnsPrometheusDiscoveryService: + service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-kops.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-kops.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..556fa85639a83f44f916f9ff6b74eaadb41d8fa2 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-kops.libsonnet @@ -0,0 +1,23 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+:: { + kubeControllerManagerPrometheusDiscoveryService: + service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + + service.mixin.spec.withClusterIp('None'), + kubeSchedulerPrometheusDiscoveryService: + service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + + service.mixin.spec.withClusterIp('None'), + kubeDnsPrometheusDiscoveryService: + service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-ksonnet.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-ksonnet.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..664e19125b72419a58e79bf108fe981d6ae1826a --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-ksonnet.libsonnet @@ -0,0 +1,8 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet'); + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/jsonnet/kube-prometheus/kube-prometheus-kube-aws.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-kube-aws.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..8a69d2156010f5070049ed7bd1d1d984affc19c3 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-kube-aws.libsonnet @@ -0,0 +1,18 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+: { + kubeControllerManagerPrometheusDiscoveryService: + service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + + service.mixin.spec.withClusterIp('None'), + kubeSchedulerPrometheusDiscoveryService: + service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-kubeadm.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-kubeadm.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..a249d1dbfc77984e7892fb7caf4b631f00c5ce6f --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-kubeadm.libsonnet @@ -0,0 +1,18 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+: { + kubeControllerManagerPrometheusDiscoveryService: + service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + + service.mixin.spec.withClusterIp('None'), + kubeSchedulerPrometheusDiscoveryService: + service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-kubespray.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-kubespray.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..8a69d2156010f5070049ed7bd1d1d984affc19c3 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-kubespray.libsonnet @@ -0,0 +1,18 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+: { + kubeControllerManagerPrometheusDiscoveryService: + service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + + service.mixin.spec.withClusterIp('None'), + kubeSchedulerPrometheusDiscoveryService: + service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-managed-cluster.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-managed-cluster.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..442c0261cac1d6396b94758b4770d151fff167b9 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-managed-cluster.libsonnet @@ -0,0 +1,28 @@ +// On managed Kubernetes clusters some of the control plane components are not exposed to customers. +// Disable scrape jobs and service monitors for these components by overwriting 'kube-prometheus.libsonnet' defaults +// Note this doesn't disable generation of associated alerting rules but the rules don't trigger + +{ + _config+:: { + // This snippet walks the original object (super.jobs, set as temp var j) and creates a replacement jobs object + // excluding any members of the set specified (eg: controller and scheduler). + local j = super.jobs, + jobs: { + [k]: j[k] + for k in std.objectFields(j) + if !std.setMember(k, ['KubeControllerManager', 'KubeScheduler']) + }, + }, + + // Same as above but for ServiceMonitor's + local p = super.prometheus, + prometheus: { + [q]: p[q] + for q in std.objectFields(p) + if !std.setMember(q, ['serviceMonitorKubeControllerManager', 'serviceMonitorKubeScheduler']) + }, + + // TODO: disable generationg of alerting rules + // manifests/prometheus-rules.yaml:52: - name: kube-scheduler.rules + +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-node-ports.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-node-ports.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..48df7478aeb2db4e0c39d797aa3b68eafe599769 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-node-ports.libsonnet @@ -0,0 +1,21 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + prometheus+: { + service+: + service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) + + service.mixin.spec.withType('NodePort'), + }, + alertmanager+: { + service+: + service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) + + service.mixin.spec.withType('NodePort'), + }, + grafana+: { + service+: + service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) + + service.mixin.spec.withType('NodePort'), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..573e809c7b3eedc5b65737f5f0f629c8564f82f7 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet @@ -0,0 +1,99 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; + +(import 'etcd-mixin/mixin.libsonnet') + { + _config+:: { + etcd: { + ips: [], + clientCA: null, + clientKey: null, + clientCert: null, + serverName: null, + insecureSkipVerify: null, + }, + }, + prometheus+:: { + serviceEtcd: + local service = k.core.v1.service; + local servicePort = k.core.v1.service.mixin.spec.portsType; + + local etcdServicePort = servicePort.newNamed('metrics', 2379, 2379); + + service.new('etcd', null, etcdServicePort) + + service.mixin.metadata.withNamespace('kube-system') + + service.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) + + service.mixin.spec.withClusterIp('None'), + endpointsEtcd: + local endpoints = k.core.v1.endpoints; + local endpointSubset = endpoints.subsetsType; + local endpointPort = endpointSubset.portsType; + + local etcdPort = endpointPort.new() + + endpointPort.withName('metrics') + + endpointPort.withPort(2379) + + endpointPort.withProtocol('TCP'); + + local subset = endpointSubset.new() + + endpointSubset.withAddresses([ + { ip: etcdIP } + for etcdIP in $._config.etcd.ips + ]) + + endpointSubset.withPorts(etcdPort); + + endpoints.new() + + endpoints.mixin.metadata.withName('etcd') + + endpoints.mixin.metadata.withNamespace('kube-system') + + endpoints.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) + + endpoints.withSubsets(subset), + serviceMonitorEtcd: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'etcd', + namespace: 'kube-system', + labels: { + 'k8s-app': 'etcd', + }, + }, + spec: { + jobLabel: 'k8s-app', + endpoints: [ + { + port: 'metrics', + interval: '30s', + scheme: 'https', + // Prometheus Operator (and Prometheus) allow us to specify a tlsConfig. This is required as most likely your etcd metrics end points is secure. + tlsConfig: { + caFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client-ca.crt', + keyFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.key', + certFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.crt', + [if $._config.etcd.serverName != null then 'serverName']: $._config.etcd.serverName, + [if $._config.etcd.insecureSkipVerify != null then 'insecureSkipVerify']: $._config.etcd.insecureSkipVerify, + }, + }, + ], + selector: { + matchLabels: { + 'k8s-app': 'etcd', + }, + }, + }, + }, + secretEtcdCerts: + // Prometheus Operator allows us to mount secrets in the pod. By loading the secrets as files, they can be made available inside the Prometheus pod. + local secret = k.core.v1.secret; + secret.new('kube-etcd-client-certs', { + 'etcd-client-ca.crt': std.base64($._config.etcd.clientCA), + 'etcd-client.key': std.base64($._config.etcd.clientKey), + 'etcd-client.crt': std.base64($._config.etcd.clientCert), + }) + + secret.mixin.metadata.withNamespace($._config.namespace), + prometheus+: + { + // Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec + spec+: { + secrets+: [$.prometheus.secretEtcdCerts.metadata.name], + }, + }, + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus-thanos.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-thanos.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..4b20b814a58ba415931c1a3a52437ad6902f9675 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus-thanos.libsonnet @@ -0,0 +1,219 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local service = k.core.v1.service; +local servicePort = k.core.v1.service.mixin.spec.portsType; + +{ + _config+:: { + versions+:: { + thanos: 'v0.3.2', + }, + imageRepos+:: { + thanos: 'improbable/thanos', + }, + thanos+:: { + objectStorageConfig: { + key: 'thanos.yaml', // How the file inside the secret is called + name: 'thanos-objstore-config', // This is the name of your Kubernetes secret with the config + }, + }, + }, + prometheus+:: { + prometheus+: { + spec+: { + podMetadata+: { + labels+: { 'thanos-peers': 'true' }, + }, + thanos+: { + peers: 'thanos-peers.' + $._config.namespace + '.svc:10900', + version: $._config.versions.thanos, + baseImage: $._config.imageRepos.thanos, + objectStorageConfig: $._config.thanos.objectStorageConfig, + }, + }, + }, + thanosPeerService: + service.new('thanos-peers', { 'thanos-peers': 'true' }, [ + servicePort.newNamed('cluster', 10900, 'cluster'), + servicePort.newNamed('http', 10902, 'http'), + ]) + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels({ 'thanos-peers': 'true' }) + + service.mixin.spec.withType('ClusterIP') + + service.mixin.spec.withClusterIp('None'), + + serviceMonitorThanosPeer: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'thanos-peers', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'thanos-peers', + }, + }, + spec: { + jobLabel: 'k8s-app', + endpoints: [ + { + port: 'http', + interval: '30s', + }, + ], + selector: { + matchLabels: { + 'thanos-peers': 'true', + }, + }, + }, + }, + thanosQueryDeployment: + local deployment = k.apps.v1beta2.deployment; + local container = k.apps.v1beta2.deployment.mixin.spec.template.spec.containersType; + local containerPort = container.portsType; + + local thanosQueryContainer = + container.new('thanos-query', $._config.imageRepos.thanos + ':' + $._config.versions.thanos) + + container.withPorts([ + containerPort.newNamed('http', 10902), + containerPort.newNamed('grpc', 10901), + containerPort.newNamed('cluster', 10900), + ]) + + container.withArgs([ + 'query', + '--log.level=debug', + '--query.replica-label=prometheus_replica', + '--query.auto-downsampling', + '--cluster.peers=thanos-peers.' + $._config.namespace + '.svc:10900', + ]); + local podLabels = { app: 'thanos-query', 'thanos-peers': 'true' }; + deployment.new('thanos-query', 1, thanosQueryContainer, podLabels) + + deployment.mixin.metadata.withNamespace($._config.namespace) + + deployment.mixin.metadata.withLabels(podLabels) + + deployment.mixin.spec.selector.withMatchLabels(podLabels) + + deployment.mixin.spec.template.spec.withServiceAccountName('prometheus-' + $._config.prometheus.name), + thanosQueryService: + local thanosQueryPort = servicePort.newNamed('http-query', 9090, 'http'); + service.new('thanos-query', { app: 'thanos-query' }, thanosQueryPort) + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels({ app: 'thanos-query' }), + + thanosStoreStatefulset: + local statefulSet = k.apps.v1beta2.statefulSet; + local volume = statefulSet.mixin.spec.template.spec.volumesType; + local container = statefulSet.mixin.spec.template.spec.containersType; + local containerEnv = container.envType; + local containerVolumeMount = container.volumeMountsType; + + local labels = { app: 'thanos', 'thanos-peers': 'true' }; + + local c = + container.new('thanos-store', $._config.imageRepos.thanos + ':' + $._config.versions.thanos) + + container.withArgs([ + 'store', + '--log.level=debug', + '--data-dir=/var/thanos/store', + '--cluster.peers=thanos-peers.' + $._config.namespace + '.svc:10900', + '--objstore.config=$(OBJSTORE_CONFIG)', + ]) + + container.withEnv([ + containerEnv.fromSecretRef( + 'OBJSTORE_CONFIG', + $._config.thanos.objectStorageConfig.name, + $._config.thanos.objectStorageConfig.key, + ), + ]) + + container.withPorts([ + { name: 'cluster', containerPort: 10900 }, + { name: 'grpc', containerPort: 10901 }, + { name: 'http', containerPort: 10902 }, + ]) + + container.withVolumeMounts([ + containerVolumeMount.new('data', '/var/thanos/store', false), + ]); + + statefulSet.new('thanos-store', 1, c, [], labels) + + statefulSet.mixin.metadata.withNamespace($._config.namespace) + + statefulSet.mixin.spec.selector.withMatchLabels(labels) + + statefulSet.mixin.spec.withServiceName('thanos-store') + + statefulSet.mixin.spec.template.spec.withVolumes([ + volume.fromEmptyDir('data'), + ]), + + serviceMonitorThanosCompactor: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'thanos-compactor', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'thanos-compactor', + }, + }, + spec: { + jobLabel: 'k8s-app', + endpoints: [ + { + port: 'http', + interval: '30s', + }, + ], + selector: { + matchLabels: { + app: 'thanos-compactor', + }, + }, + }, + }, + + thanosCompactorService: + service.new( + 'thanos-compactor', + { app: 'thanos-compactor' }, + servicePort.newNamed('http', 9090, 'http'), + ) + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels({ app: 'thanos-compactor' }), + + thanosCompactorStatefulset: + local statefulSet = k.apps.v1beta2.statefulSet; + local volume = statefulSet.mixin.spec.template.spec.volumesType; + local container = statefulSet.mixin.spec.template.spec.containersType; + local containerEnv = container.envType; + local containerVolumeMount = container.volumeMountsType; + + local labels = { app: 'thanos-compactor' }; + + local c = + container.new('thanos-compactor', $._config.imageRepos.thanos + ':' + $._config.versions.thanos) + + container.withArgs([ + 'compact', + '--log.level=debug', + '--data-dir=/var/thanos/store', + '--objstore.config=$(OBJSTORE_CONFIG)', + '--wait', + ]) + + container.withEnv([ + containerEnv.fromSecretRef( + 'OBJSTORE_CONFIG', + $._config.thanos.objectStorageConfig.name, + $._config.thanos.objectStorageConfig.key, + ), + ]) + + container.withPorts([ + { name: 'http', containerPort: 10902 }, + ]) + + container.withVolumeMounts([ + containerVolumeMount.new('data', '/var/thanos/store', false), + ]); + + statefulSet.new('thanos-compactor', 1, c, [], labels) + + statefulSet.mixin.metadata.withNamespace($._config.namespace) + + statefulSet.mixin.spec.selector.withMatchLabels(labels) + + statefulSet.mixin.spec.withServiceName('thanos-compactor') + + statefulSet.mixin.spec.template.spec.withVolumes([ + volume.fromEmptyDir('data'), + ]), + }, +} diff --git a/jsonnet/kube-prometheus/kube-prometheus.libsonnet b/jsonnet/kube-prometheus/kube-prometheus.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..f51ae49ac1aad5a2bb004c83ed6aff2e28bb9d3d --- /dev/null +++ b/jsonnet/kube-prometheus/kube-prometheus.libsonnet @@ -0,0 +1,114 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; +local configMapList = k.core.v1.configMapList; + +(import 'grafana/grafana.libsonnet') + +(import 'kube-state-metrics/kube-state-metrics.libsonnet') + +(import 'node-exporter/node-exporter.libsonnet') + +(import 'alertmanager/alertmanager.libsonnet') + +(import 'prometheus-operator/prometheus-operator.libsonnet') + +(import 'prometheus/prometheus.libsonnet') + +(import 'prometheus-adapter/prometheus-adapter.libsonnet') + +(import 'kubernetes-mixin/mixin.libsonnet') + +(import 'alerts/alerts.libsonnet') + +(import 'rules/rules.libsonnet') + { + kubePrometheus+:: { + namespace: k.core.v1.namespace.new($._config.namespace), + }, + grafana+:: { + dashboardDefinitions: configMapList.new(super.dashboardDefinitions), + serviceMonitor: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'grafana', + namespace: $._config.namespace, + }, + spec: { + selector: { + matchLabels: { + app: 'grafana', + }, + }, + endpoints: [ + { + port: 'http', + interval: '15s', + }, + ], + }, + }, + }, +} + { + _config+:: { + namespace: 'default', + + versions+:: { + grafana: '6.0.1', + }, + + tlsCipherSuites: [ + 'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721 + 'TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721 + + // 'TLS_RSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566 + // 'TLS_RSA_WITH_3DES_EDE_CBC_SHA', // insecure: https://access.redhat.com/articles/2548661 + // 'TLS_RSA_WITH_AES_128_CBC_SHA', // disabled by h2 + // 'TLS_RSA_WITH_AES_256_CBC_SHA', // disabled by h2 + 'TLS_RSA_WITH_AES_128_CBC_SHA256', + // 'TLS_RSA_WITH_AES_128_GCM_SHA256', // disabled by h2 + // 'TLS_RSA_WITH_AES_256_GCM_SHA384', // disabled by h2 + // 'TLS_ECDHE_ECDSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566 + // 'TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA',// disabled by h2 + // 'TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA',// disabled by h2 + // 'TLS_ECDHE_RSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566 + // 'TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA', // insecure: https://access.redhat.com/articles/2548661 + // 'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA', // disabled by h2 + // 'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA', // disabled by h2 + 'TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256', + 'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256', + + // disabled by h2 means: https://github.com/golang/net/blob/e514e69ffb8bc3c76a71ae40de0118d794855992/http2/ciphers.go + + // 'TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384', // TODO: Might not work with h2 + // 'TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384', // TODO: Might not work with h2 + // 'TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305', // TODO: Might not work with h2 + // 'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305', // TODO: Might not work with h2 + ], + + cadvisorSelector: 'job="kubelet"', + kubeletSelector: 'job="kubelet"', + kubeStateMetricsSelector: 'job="kube-state-metrics"', + nodeExporterSelector: 'job="node-exporter"', + notKubeDnsSelector: 'job!="kube-dns"', + kubeSchedulerSelector: 'job="kube-scheduler"', + kubeControllerManagerSelector: 'job="kube-controller-manager"', + kubeApiserverSelector: 'job="apiserver"', + coreDNSSelector: 'job="kube-dns"', + podLabel: 'pod', + + alertmanagerSelector: 'job="alertmanager-main",namespace="' + $._config.namespace + '"', + prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"', + prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + $._config.namespace + '"', + + jobs: { + Kubelet: $._config.kubeletSelector, + KubeScheduler: $._config.kubeSchedulerSelector, + KubeControllerManager: $._config.kubeControllerManagerSelector, + KubeAPI: $._config.kubeApiserverSelector, + KubeStateMetrics: $._config.kubeStateMetricsSelector, + NodeExporter: $._config.nodeExporterSelector, + Alertmanager: $._config.alertmanagerSelector, + Prometheus: $._config.prometheusSelector, + PrometheusOperator: $._config.prometheusOperatorSelector, + CoreDNS: $._config.coreDNSSelector, + }, + + prometheus+:: { + rules: $.prometheusRules + $.prometheusAlerts, + }, + + grafana+:: { + dashboards: $.grafanaDashboards, + }, + }, +} diff --git a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..5172fb9427c37fcd9af4ba4a2ecac848cfe85212 --- /dev/null +++ b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet @@ -0,0 +1,315 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; + +{ + _config+:: { + namespace: 'default', + + kubeStateMetrics+:: { + collectors: '', // empty string gets a default set + scrapeInterval: '30s', + scrapeTimeout: '30s', + + baseCPU: '100m', + baseMemory: '150Mi', + cpuPerNode: '2m', + memoryPerNode: '30Mi', + }, + + versions+:: { + kubeStateMetrics: 'v1.5.0', + kubeRbacProxy: 'v0.4.1', + addonResizer: '1.8.4', + }, + + imageRepos+:: { + kubeStateMetrics: 'quay.io/coreos/kube-state-metrics', + kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy', + addonResizer: 'k8s.gcr.io/addon-resizer', + }, + }, + + kubeStateMetrics+:: { + clusterRoleBinding: + local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; + + clusterRoleBinding.new() + + clusterRoleBinding.mixin.metadata.withName('kube-state-metrics') + + clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + clusterRoleBinding.mixin.roleRef.withName('kube-state-metrics') + + clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + + clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'kube-state-metrics', namespace: $._config.namespace }]), + + clusterRole: + local clusterRole = k.rbac.v1.clusterRole; + local rulesType = clusterRole.rulesType; + + local coreRule = rulesType.new() + + rulesType.withApiGroups(['']) + + rulesType.withResources([ + 'configmaps', + 'secrets', + 'nodes', + 'pods', + 'services', + 'resourcequotas', + 'replicationcontrollers', + 'limitranges', + 'persistentvolumeclaims', + 'persistentvolumes', + 'namespaces', + 'endpoints', + ]) + + rulesType.withVerbs(['list', 'watch']); + + local extensionsRule = rulesType.new() + + rulesType.withApiGroups(['extensions']) + + rulesType.withResources([ + 'daemonsets', + 'deployments', + 'replicasets', + ]) + + rulesType.withVerbs(['list', 'watch']); + + local appsRule = rulesType.new() + + rulesType.withApiGroups(['apps']) + + rulesType.withResources([ + 'statefulsets', + 'daemonsets', + 'deployments', + 'replicasets', + ]) + + rulesType.withVerbs(['list', 'watch']); + + local batchRule = rulesType.new() + + rulesType.withApiGroups(['batch']) + + rulesType.withResources([ + 'cronjobs', + 'jobs', + ]) + + rulesType.withVerbs(['list', 'watch']); + + local autoscalingRule = rulesType.new() + + rulesType.withApiGroups(['autoscaling']) + + rulesType.withResources([ + 'horizontalpodautoscalers', + ]) + + rulesType.withVerbs(['list', 'watch']); + + local authenticationRole = rulesType.new() + + rulesType.withApiGroups(['authentication.k8s.io']) + + rulesType.withResources([ + 'tokenreviews', + ]) + + rulesType.withVerbs(['create']); + + local authorizationRole = rulesType.new() + + rulesType.withApiGroups(['authorization.k8s.io']) + + rulesType.withResources([ + 'subjectaccessreviews', + ]) + + rulesType.withVerbs(['create']); + + local policyRule = rulesType.new() + + rulesType.withApiGroups(['policy']) + + rulesType.withResources([ + 'poddisruptionbudgets', + ]) + + rulesType.withVerbs(['list', 'watch']); + + local rules = [coreRule, extensionsRule, appsRule, batchRule, autoscalingRule, authenticationRole, authorizationRole, policyRule]; + + clusterRole.new() + + clusterRole.mixin.metadata.withName('kube-state-metrics') + + clusterRole.withRules(rules), + deployment: + local deployment = k.apps.v1beta2.deployment; + local container = k.apps.v1beta2.deployment.mixin.spec.template.spec.containersType; + local volume = k.apps.v1beta2.deployment.mixin.spec.template.spec.volumesType; + local containerPort = container.portsType; + local containerVolumeMount = container.volumeMountsType; + local podSelector = deployment.mixin.spec.template.spec.selectorType; + + local podLabels = { app: 'kube-state-metrics' }; + + local proxyClusterMetrics = + container.new('kube-rbac-proxy-main', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) + + container.withArgs([ + '--logtostderr', + '--secure-listen-address=:8443', + '--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites), + '--upstream=http://127.0.0.1:8081/', + ]) + + container.withPorts(containerPort.newNamed('https-main', 8443)) + + container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) + + container.mixin.resources.withLimits({ cpu: '20m', memory: '40Mi' }); + + local proxySelfMetrics = + container.new('kube-rbac-proxy-self', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) + + container.withArgs([ + '--logtostderr', + '--secure-listen-address=:9443', + '--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites), + '--upstream=http://127.0.0.1:8082/', + ]) + + container.withPorts(containerPort.newNamed('https-self', 9443)) + + container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) + + container.mixin.resources.withLimits({ cpu: '20m', memory: '40Mi' }); + + local kubeStateMetrics = + container.new('kube-state-metrics', $._config.imageRepos.kubeStateMetrics + ':' + $._config.versions.kubeStateMetrics) + + container.withArgs([ + '--host=127.0.0.1', + '--port=8081', + '--telemetry-host=127.0.0.1', + '--telemetry-port=8082', + ] + if $._config.kubeStateMetrics.collectors != '' then ['--collectors=' + $._config.kubeStateMetrics.collectors] else []) + + container.mixin.resources.withRequests({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }) + + container.mixin.resources.withLimits({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }); + + local addonResizer = + container.new('addon-resizer', $._config.imageRepos.addonResizer + ':' + $._config.versions.addonResizer) + + container.withCommand([ + '/pod_nanny', + '--container=kube-state-metrics', + '--cpu=' + $._config.kubeStateMetrics.baseCPU, + '--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode, + '--memory=' + $._config.kubeStateMetrics.baseMemory, + '--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode, + '--threshold=5', + '--deployment=kube-state-metrics', + ]) + + container.withEnv([ + { + name: 'MY_POD_NAME', + valueFrom: { + fieldRef: { apiVersion: 'v1', fieldPath: 'metadata.name' }, + }, + }, + { + name: 'MY_POD_NAMESPACE', + valueFrom: { + fieldRef: { apiVersion: 'v1', fieldPath: 'metadata.namespace' }, + }, + }, + ]) + + container.mixin.resources.withRequests({ cpu: '10m', memory: '30Mi' }) + + container.mixin.resources.withLimits({ cpu: '50m', memory: '30Mi' }); + + local c = [proxyClusterMetrics, proxySelfMetrics, kubeStateMetrics, addonResizer]; + + deployment.new('kube-state-metrics', 1, c, podLabels) + + deployment.mixin.metadata.withNamespace($._config.namespace) + + deployment.mixin.metadata.withLabels(podLabels) + + deployment.mixin.spec.selector.withMatchLabels(podLabels) + + deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) + + deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) + + deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) + + deployment.mixin.spec.template.spec.withServiceAccountName('kube-state-metrics'), + + roleBinding: + local roleBinding = k.rbac.v1.roleBinding; + + roleBinding.new() + + roleBinding.mixin.metadata.withName('kube-state-metrics') + + roleBinding.mixin.metadata.withNamespace($._config.namespace) + + roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + roleBinding.mixin.roleRef.withName('kube-state-metrics') + + roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + + roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'kube-state-metrics' }]), + + role: + local role = k.rbac.v1.role; + local rulesType = role.rulesType; + + local coreRule = rulesType.new() + + rulesType.withApiGroups(['']) + + rulesType.withResources([ + 'pods', + ]) + + rulesType.withVerbs(['get']); + + local extensionsRule = rulesType.new() + + rulesType.withApiGroups(['extensions']) + + rulesType.withResources([ + 'deployments', + ]) + + rulesType.withVerbs(['get', 'update']) + + rulesType.withResourceNames(['kube-state-metrics']); + + local appsRule = rulesType.new() + + rulesType.withApiGroups(['apps']) + + rulesType.withResources([ + 'deployments', + ]) + + rulesType.withVerbs(['get', 'update']) + + rulesType.withResourceNames(['kube-state-metrics']); + + local rules = [coreRule, extensionsRule, appsRule]; + + role.new() + + role.mixin.metadata.withName('kube-state-metrics') + + role.mixin.metadata.withNamespace($._config.namespace) + + role.withRules(rules), + + serviceAccount: + local serviceAccount = k.core.v1.serviceAccount; + + serviceAccount.new('kube-state-metrics') + + serviceAccount.mixin.metadata.withNamespace($._config.namespace), + + service: + local service = k.core.v1.service; + local servicePort = k.core.v1.service.mixin.spec.portsType; + + local ksmServicePortMain = servicePort.newNamed('https-main', 8443, 'https-main'); + local ksmServicePortSelf = servicePort.newNamed('https-self', 9443, 'https-self'); + + service.new('kube-state-metrics', $.kubeStateMetrics.deployment.spec.selector.matchLabels, [ksmServicePortMain, ksmServicePortSelf]) + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels({ 'k8s-app': 'kube-state-metrics' }) + + service.mixin.spec.withClusterIp('None'), + + serviceMonitor: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'kube-state-metrics', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'kube-state-metrics', + }, + }, + spec: { + jobLabel: 'k8s-app', + selector: { + matchLabels: { + 'k8s-app': 'kube-state-metrics', + }, + }, + endpoints: [ + { + port: 'https-main', + scheme: 'https', + interval: $._config.kubeStateMetrics.scrapeInterval, + scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout, + honorLabels: true, + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + tlsConfig: { + insecureSkipVerify: true, + }, + }, + { + port: 'https-self', + scheme: 'https', + interval: '30s', + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + tlsConfig: { + insecureSkipVerify: true, + }, + }, + ], + }, + }, + }, +} diff --git a/jsonnet/kube-prometheus/lib/image.libsonnet b/jsonnet/kube-prometheus/lib/image.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..0561e33c820c207d8a7dd1da2b31bdd9350afd14 --- /dev/null +++ b/jsonnet/kube-prometheus/lib/image.libsonnet @@ -0,0 +1,21 @@ +// imageName extracts the image name from a fully qualified image string. eg. +// quay.io/coreos/addon-resizer -> addon-resizer +// grafana/grafana -> grafana +local imageName(image) = + local parts = std.split(image, '/'); + local len = std.length(parts); + if len == 3 then + # registry.com/org/image + parts[2] + else if len == 2 then + # org/image + parts[1] + else if len == 1 then + # image, ie. busybox + parts[0] + else + error 'unknown image format: ' + image; + +{ + imageName:: imageName, +} diff --git a/jsonnet/kube-prometheus/lib/lib.libsonnet b/jsonnet/kube-prometheus/lib/lib.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..c30f976f15a4367a4361c72306aa579e5580f6fe --- /dev/null +++ b/jsonnet/kube-prometheus/lib/lib.libsonnet @@ -0,0 +1 @@ +(import 'image.libsonnet') diff --git a/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet b/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..8aae5b86dda47add15145add3eae37e027fb0c91 --- /dev/null +++ b/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet @@ -0,0 +1,199 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; + +{ + _config+:: { + namespace: 'default', + + versions+:: { + nodeExporter: 'v0.17.0', + kubeRbacProxy: 'v0.4.1', + }, + + imageRepos+:: { + nodeExporter: 'quay.io/prometheus/node-exporter', + kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy', + }, + + nodeExporter+:: { + port: 9100, + }, + }, + + nodeExporter+:: { + clusterRoleBinding: + local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; + + clusterRoleBinding.new() + + clusterRoleBinding.mixin.metadata.withName('node-exporter') + + clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + clusterRoleBinding.mixin.roleRef.withName('node-exporter') + + clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + + clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'node-exporter', namespace: $._config.namespace }]), + + clusterRole: + local clusterRole = k.rbac.v1.clusterRole; + local policyRule = clusterRole.rulesType; + + local authenticationRole = policyRule.new() + + policyRule.withApiGroups(['authentication.k8s.io']) + + policyRule.withResources([ + 'tokenreviews', + ]) + + policyRule.withVerbs(['create']); + + local authorizationRole = policyRule.new() + + policyRule.withApiGroups(['authorization.k8s.io']) + + policyRule.withResources([ + 'subjectaccessreviews', + ]) + + policyRule.withVerbs(['create']); + + local rules = [authenticationRole, authorizationRole]; + + clusterRole.new() + + clusterRole.mixin.metadata.withName('node-exporter') + + clusterRole.withRules(rules), + + daemonset: + local daemonset = k.apps.v1beta2.daemonSet; + local container = daemonset.mixin.spec.template.spec.containersType; + local volume = daemonset.mixin.spec.template.spec.volumesType; + local containerPort = container.portsType; + local containerVolumeMount = container.volumeMountsType; + local podSelector = daemonset.mixin.spec.template.spec.selectorType; + local toleration = daemonset.mixin.spec.template.spec.tolerationsType; + local containerEnv = container.envType; + + local podLabels = { app: 'node-exporter' }; + + local noExecuteToleration = toleration.new() + + toleration.withOperator('Exists') + + toleration.withEffect('NoExecute'); + + local noScheduleToleration = toleration.new() + + toleration.withOperator('Exists') + + toleration.withEffect('NoSchedule'); + + local procVolumeName = 'proc'; + local procVolume = volume.fromHostPath(procVolumeName, '/proc'); + local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc'); + + local sysVolumeName = 'sys'; + local sysVolume = volume.fromHostPath(sysVolumeName, '/sys'); + local sysVolumeMount = containerVolumeMount.new(sysVolumeName, '/host/sys'); + + local rootVolumeName = 'root'; + local rootVolume = volume.fromHostPath(rootVolumeName, '/'); + local rootVolumeMount = containerVolumeMount.new(rootVolumeName, '/host/root'). + withMountPropagation('HostToContainer'). + withReadOnly(true); + + local nodeExporter = + container.new('node-exporter', $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter) + + container.withArgs([ + '--web.listen-address=127.0.0.1:' + $._config.nodeExporter.port, + '--path.procfs=/host/proc', + '--path.sysfs=/host/sys', + '--path.rootfs=/host/root', + + // The following settings have been taken from + // https://github.com/prometheus/node_exporter/blob/0662673/collector/filesystem_linux.go#L30-L31 + // Once node exporter is being released with those settings, this can be removed. + '--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)', + '--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$', + '--collector.ntp', + ]) + + container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) + + container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) + + container.mixin.resources.withLimits({ cpu: '250m', memory: '180Mi' }); + + local ip = containerEnv.fromFieldPath('IP', 'status.podIP'); + local proxy = + container.new('kube-rbac-proxy', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) + + container.withArgs([ + '--logtostderr', + '--secure-listen-address=$(IP):' + $._config.nodeExporter.port, + '--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites), + '--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/', + ]) + + // Keep `hostPort` here, rather than in the node-exporter container + // because Kubernetes mandates that if you define a `hostPort` then + // `containerPort` must match. In our case, we are splitting the + // host port and container port between the two containers. + // We'll keep the port specification here so that the named port + // used by the service is tied to the proxy container. We *could* + // forgo declaring the host port, however it is important to declare + // it so that the scheduler can decide if the pod is schedulable. + container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) + + container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) + + container.mixin.resources.withLimits({ cpu: '20m', memory: '40Mi' }) + + container.withEnv([ip]); + + local c = [nodeExporter, proxy]; + + daemonset.new() + + daemonset.mixin.metadata.withName('node-exporter') + + daemonset.mixin.metadata.withNamespace($._config.namespace) + + daemonset.mixin.metadata.withLabels(podLabels) + + daemonset.mixin.spec.selector.withMatchLabels(podLabels) + + daemonset.mixin.spec.template.metadata.withLabels(podLabels) + + daemonset.mixin.spec.template.spec.withTolerations([noExecuteToleration, noScheduleToleration]) + + daemonset.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) + + daemonset.mixin.spec.template.spec.withContainers(c) + + daemonset.mixin.spec.template.spec.withVolumes([procVolume, sysVolume, rootVolume]) + + daemonset.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) + + daemonset.mixin.spec.template.spec.securityContext.withRunAsUser(65534) + + daemonset.mixin.spec.template.spec.withServiceAccountName('node-exporter') + + daemonset.mixin.spec.template.spec.withHostPid(true) + + daemonset.mixin.spec.template.spec.withHostNetwork(true), + + serviceAccount: + local serviceAccount = k.core.v1.serviceAccount; + + serviceAccount.new('node-exporter') + + serviceAccount.mixin.metadata.withNamespace($._config.namespace), + + serviceMonitor: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'node-exporter', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'node-exporter', + }, + }, + spec: { + jobLabel: 'k8s-app', + selector: { + matchLabels: { + 'k8s-app': 'node-exporter', + }, + }, + endpoints: [ + { + port: 'https', + scheme: 'https', + interval: '30s', + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + tlsConfig: { + insecureSkipVerify: true, + }, + }, + ], + }, + }, + + service: + local service = k.core.v1.service; + local servicePort = k.core.v1.service.mixin.spec.portsType; + + local nodeExporterPort = servicePort.newNamed('https', $._config.nodeExporter.port, 'https'); + + service.new('node-exporter', $.nodeExporter.daemonset.spec.selector.matchLabels, nodeExporterPort) + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels({ 'k8s-app': 'node-exporter' }) + + service.mixin.spec.withClusterIp('None'), + }, +} diff --git a/jsonnet/kube-prometheus/prometheus-adapter/prometheus-adapter.libsonnet b/jsonnet/kube-prometheus/prometheus-adapter/prometheus-adapter.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..6d6604bc2ef7f11e30cfbccc9f40f7fa7b52bf69 --- /dev/null +++ b/jsonnet/kube-prometheus/prometheus-adapter/prometheus-adapter.libsonnet @@ -0,0 +1,202 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; + +{ + _config+:: { + namespace: 'default', + + versions+:: { + prometheusAdapter: 'v0.4.1', + }, + + imageRepos+:: { + prometheusAdapter: 'quay.io/coreos/k8s-prometheus-adapter-amd64', + }, + + prometheusAdapter+:: { + name: 'prometheus-adapter', + labels: { name: $._config.prometheusAdapter.name }, + prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc:9090/', + config: ||| + resourceRules: + cpu: + containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>) + nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod_name: + resource: pod + containerLabel: container_name + memory: + containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>) + nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod_name: + resource: pod + containerLabel: container_name + window: 1m + |||, + }, + }, + + prometheusAdapter+:: { + apiService: + { + apiVersion: 'apiregistration.k8s.io/v1', + kind: 'APIService', + metadata: { + name: 'v1beta1.metrics.k8s.io', + }, + spec: { + service: { + name: $.prometheusAdapter.service.metadata.name, + namespace: $._config.namespace, + }, + group: 'metrics.k8s.io', + version: 'v1beta1', + insecureSkipTLSVerify: true, + groupPriorityMinimum: 100, + versionPriority: 100, + }, + }, + + configMap: + local configmap = k.core.v1.configMap; + + configmap.new('adapter-config', { 'config.yaml': $._config.prometheusAdapter.config }) + + configmap.mixin.metadata.withNamespace($._config.namespace), + + service: + local service = k.core.v1.service; + local servicePort = k.core.v1.service.mixin.spec.portsType; + + service.new( + $._config.prometheusAdapter.name, + $._config.prometheusAdapter.labels, + servicePort.newNamed('https', 443, 6443), + ) + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels($._config.prometheusAdapter.labels), + + deployment: + local deployment = k.apps.v1beta2.deployment; + local volume = deployment.mixin.spec.template.spec.volumesType; + local container = deployment.mixin.spec.template.spec.containersType; + local containerVolumeMount = container.volumeMountsType; + + local c = + container.new($._config.prometheusAdapter.name, $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter) + + container.withArgs([ + '--cert-dir=/var/run/serving-cert', + '--config=/etc/adapter/config.yaml', + '--logtostderr=true', + '--metrics-relist-interval=1m', + '--prometheus-url=' + $._config.prometheusAdapter.prometheusURL, + '--secure-port=6443', + ]) + + container.withPorts([{ containerPort: 6443 }]) + + container.withVolumeMounts([ + containerVolumeMount.new('tmpfs', '/tmp'), + containerVolumeMount.new('volume-serving-cert', '/var/run/serving-cert'), + containerVolumeMount.new('config', '/etc/adapter'), + ],); + + deployment.new($._config.prometheusAdapter.name, 1, c, $._config.prometheusAdapter.labels) + + deployment.mixin.metadata.withNamespace($._config.namespace) + + deployment.mixin.spec.selector.withMatchLabels($._config.prometheusAdapter.labels) + + deployment.mixin.spec.template.spec.withServiceAccountName($.prometheusAdapter.serviceAccount.metadata.name) + + deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) + + deployment.mixin.spec.template.spec.withVolumes([ + volume.fromEmptyDir(name='tmpfs'), + volume.fromEmptyDir(name='volume-serving-cert'), + { name: 'config', configMap: { name: 'adapter-config' } }, + ]), + + serviceAccount: + local serviceAccount = k.core.v1.serviceAccount; + + serviceAccount.new($._config.prometheusAdapter.name) + + serviceAccount.mixin.metadata.withNamespace($._config.namespace), + + clusterRole: + local clusterRole = k.rbac.v1.clusterRole; + local policyRule = clusterRole.rulesType; + + local rules = + policyRule.new() + + policyRule.withApiGroups(['']) + + policyRule.withResources(['nodes', 'namespaces', 'pods', 'services']) + + policyRule.withVerbs(['get', 'list', 'watch']); + + clusterRole.new() + + clusterRole.mixin.metadata.withName($._config.prometheusAdapter.name) + + clusterRole.withRules(rules), + + clusterRoleBinding: + local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; + + clusterRoleBinding.new() + + clusterRoleBinding.mixin.metadata.withName($._config.prometheusAdapter.name) + + clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + clusterRoleBinding.mixin.roleRef.withName($.prometheusAdapter.clusterRole.metadata.name) + + clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + + clusterRoleBinding.withSubjects([{ + kind: 'ServiceAccount', + name: $.prometheusAdapter.serviceAccount.metadata.name, + namespace: $._config.namespace, + }]), + + clusterRoleBindingDelegator: + local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; + + clusterRoleBinding.new() + + clusterRoleBinding.mixin.metadata.withName('resource-metrics:system:auth-delegator') + + clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + clusterRoleBinding.mixin.roleRef.withName('system:auth-delegator') + + clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + + clusterRoleBinding.withSubjects([{ + kind: 'ServiceAccount', + name: $.prometheusAdapter.serviceAccount.metadata.name, + namespace: $._config.namespace, + }]), + + clusterRoleServerResources: + local clusterRole = k.rbac.v1.clusterRole; + local policyRule = clusterRole.rulesType; + + local rules = + policyRule.new() + + policyRule.withApiGroups(['metrics.k8s.io']) + + policyRule.withResources(['*']) + + policyRule.withVerbs(['*']); + + clusterRole.new() + + clusterRole.mixin.metadata.withName('resource-metrics-server-resources') + + clusterRole.withRules(rules), + + roleBindingAuthReader: + local roleBinding = k.rbac.v1.roleBinding; + + roleBinding.new() + + roleBinding.mixin.metadata.withName('resource-metrics-auth-reader') + + roleBinding.mixin.metadata.withNamespace('kube-system') + + roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + roleBinding.mixin.roleRef.withName('extension-apiserver-authentication-reader') + + roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + + roleBinding.withSubjects([{ + kind: 'ServiceAccount', + name: $.prometheusAdapter.serviceAccount.metadata.name, + namespace: $._config.namespace, + }]), + }, +} diff --git a/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..57c541095cee761671e942f2d2add5286dd02b3d --- /dev/null +++ b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet @@ -0,0 +1,434 @@ +local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; + +{ + _config+:: { + namespace: 'default', + + versions+:: { + prometheus: 'v2.7.2', + }, + + imageRepos+:: { + prometheus: 'quay.io/prometheus/prometheus', + }, + + alertmanager+:: { + name: 'main', + }, + + prometheus+:: { + name: 'k8s', + replicas: 2, + rules: {}, + renderedRules: {}, + namespaces: ['default', 'kube-system', $._config.namespace], + }, + }, + + prometheus+:: { + serviceAccount: + local serviceAccount = k.core.v1.serviceAccount; + + serviceAccount.new('prometheus-' + $._config.prometheus.name) + + serviceAccount.mixin.metadata.withNamespace($._config.namespace), + service: + local service = k.core.v1.service; + local servicePort = k.core.v1.service.mixin.spec.portsType; + + local prometheusPort = servicePort.newNamed('web', 9090, 'web'); + + service.new('prometheus-' + $._config.prometheus.name, { app: 'prometheus', prometheus: $._config.prometheus.name }, prometheusPort) + + service.mixin.spec.withSessionAffinity('ClientIP') + + service.mixin.metadata.withNamespace($._config.namespace) + + service.mixin.metadata.withLabels({ prometheus: $._config.prometheus.name }), + [if $._config.prometheus.rules != null && $._config.prometheus.rules != {} then 'rules']: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'PrometheusRule', + metadata: { + labels: { + prometheus: $._config.prometheus.name, + role: 'alert-rules', + }, + name: 'prometheus-' + $._config.prometheus.name + '-rules', + namespace: $._config.namespace, + }, + spec: { + groups: $._config.prometheus.rules.groups, + }, + }, + roleBindingSpecificNamespaces: + local roleBinding = k.rbac.v1.roleBinding; + + local newSpecificRoleBinding(namespace) = + roleBinding.new() + + roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + + roleBinding.mixin.metadata.withNamespace(namespace) + + roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) + + roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + + roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]); + + local roleBindigList = k.rbac.v1.roleBindingList; + roleBindigList.new([newSpecificRoleBinding(x) for x in $._config.prometheus.namespaces]), + clusterRole: + local clusterRole = k.rbac.v1.clusterRole; + local policyRule = clusterRole.rulesType; + + local nodeMetricsRule = policyRule.new() + + policyRule.withApiGroups(['']) + + policyRule.withResources(['nodes/metrics']) + + policyRule.withVerbs(['get']); + + local metricsRule = policyRule.new() + + policyRule.withNonResourceUrls('/metrics') + + policyRule.withVerbs(['get']); + + local rules = [nodeMetricsRule, metricsRule]; + + clusterRole.new() + + clusterRole.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + + clusterRole.withRules(rules), + roleConfig: + local role = k.rbac.v1.role; + local policyRule = role.rulesType; + + local configmapRule = policyRule.new() + + policyRule.withApiGroups(['']) + + policyRule.withResources([ + 'configmaps', + ]) + + policyRule.withVerbs(['get']); + + role.new() + + role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name + '-config') + + role.mixin.metadata.withNamespace($._config.namespace) + + role.withRules(configmapRule), + roleBindingConfig: + local roleBinding = k.rbac.v1.roleBinding; + + roleBinding.new() + + roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name + '-config') + + roleBinding.mixin.metadata.withNamespace($._config.namespace) + + roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name + '-config') + + roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + + roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]), + clusterRoleBinding: + local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; + + clusterRoleBinding.new() + + clusterRoleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + + clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + clusterRoleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) + + clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + + clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]), + roleSpecificNamespaces: + local role = k.rbac.v1.role; + local policyRule = role.rulesType; + local coreRule = policyRule.new() + + policyRule.withApiGroups(['']) + + policyRule.withResources([ + 'services', + 'endpoints', + 'pods', + ]) + + policyRule.withVerbs(['get', 'list', 'watch']); + + local newSpecificRole(namespace) = + role.new() + + role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + + role.mixin.metadata.withNamespace(namespace) + + role.withRules(coreRule); + + local roleList = k.rbac.v1.roleList; + roleList.new([newSpecificRole(x) for x in $._config.prometheus.namespaces]), + prometheus: + local statefulSet = k.apps.v1beta2.statefulSet; + local container = statefulSet.mixin.spec.template.spec.containersType; + local resourceRequirements = container.mixin.resourcesType; + local selector = statefulSet.mixin.spec.selectorType; + + local resources = + resourceRequirements.new() + + resourceRequirements.withRequests({ memory: '400Mi' }); + + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'Prometheus', + metadata: { + name: $._config.prometheus.name, + namespace: $._config.namespace, + labels: { + prometheus: $._config.prometheus.name, + }, + }, + spec: { + replicas: $._config.prometheus.replicas, + version: $._config.versions.prometheus, + baseImage: $._config.imageRepos.prometheus, + serviceAccountName: 'prometheus-' + $._config.prometheus.name, + serviceMonitorSelector: {}, + serviceMonitorNamespaceSelector: {}, + nodeSelector: { 'beta.kubernetes.io/os': 'linux' }, + ruleSelector: selector.withMatchLabels({ + role: 'alert-rules', + prometheus: $._config.prometheus.name, + }), + resources: resources, + alerting: { + alertmanagers: [ + { + namespace: $._config.namespace, + name: 'alertmanager-' + $._config.alertmanager.name, + port: 'web', + }, + ], + }, + securityContext: { + runAsUser: 1000, + runAsNonRoot: true, + fsGroup: 2000, + }, + }, + }, + serviceMonitor: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'prometheus', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'prometheus', + }, + }, + spec: { + selector: { + matchLabels: { + prometheus: $._config.prometheus.name, + }, + }, + endpoints: [ + { + port: 'web', + interval: '30s', + }, + ], + }, + }, + serviceMonitorKubeScheduler: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'kube-scheduler', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'kube-scheduler', + }, + }, + spec: { + jobLabel: 'k8s-app', + endpoints: [ + { + port: 'http-metrics', + interval: '30s', + }, + ], + selector: { + matchLabels: { + 'k8s-app': 'kube-scheduler', + }, + }, + namespaceSelector: { + matchNames: [ + 'kube-system', + ], + }, + }, + }, + serviceMonitorKubelet: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'kubelet', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'kubelet', + }, + }, + spec: { + jobLabel: 'k8s-app', + endpoints: [ + { + port: 'https-metrics', + scheme: 'https', + interval: '30s', + honorLabels: true, + tlsConfig: { + insecureSkipVerify: true, + }, + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + }, + { + port: 'https-metrics', + scheme: 'https', + path: '/metrics/cadvisor', + interval: '30s', + honorLabels: true, + tlsConfig: { + insecureSkipVerify: true, + }, + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + metricRelabelings: [ + // Drop a bunch of metrics which are disabled but still sent, see + // https://github.com/google/cadvisor/issues/1925. + { + sourceLabels: ['__name__'], + regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)', + action: 'drop', + }, + ], + }, + ], + selector: { + matchLabels: { + 'k8s-app': 'kubelet', + }, + }, + namespaceSelector: { + matchNames: [ + 'kube-system', + ], + }, + }, + }, + serviceMonitorKubeControllerManager: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'kube-controller-manager', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'kube-controller-manager', + }, + }, + spec: { + jobLabel: 'k8s-app', + endpoints: [ + { + port: 'http-metrics', + interval: '30s', + metricRelabelings: [ + { + sourceLabels: ['__name__'], + regex: 'etcd_(debugging|disk|request|server).*', + action: 'drop', + }, + ], + }, + ], + selector: { + matchLabels: { + 'k8s-app': 'kube-controller-manager', + }, + }, + namespaceSelector: { + matchNames: [ + 'kube-system', + ], + }, + }, + }, + serviceMonitorApiserver: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'kube-apiserver', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'apiserver', + }, + }, + spec: { + jobLabel: 'component', + selector: { + matchLabels: { + component: 'apiserver', + provider: 'kubernetes', + }, + }, + namespaceSelector: { + matchNames: [ + 'default', + ], + }, + endpoints: [ + { + port: 'https', + interval: '30s', + scheme: 'https', + tlsConfig: { + caFile: '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt', + serverName: 'kubernetes', + }, + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + metricRelabelings: [ + { + sourceLabels: ['__name__'], + regex: 'etcd_(debugging|disk|request|server).*', + action: 'drop', + }, + { + sourceLabels: ['__name__'], + regex: 'apiserver_admission_controller_admission_latencies_seconds_.*', + action: 'drop', + }, + { + sourceLabels: ['__name__'], + regex: 'apiserver_admission_step_admission_latencies_seconds_.*', + action: 'drop', + }, + ], + }, + ], + }, + }, + serviceMonitorCoreDNS: + { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'coredns', + namespace: $._config.namespace, + labels: { + 'k8s-app': 'coredns', + }, + }, + spec: { + jobLabel: 'k8s-app', + selector: { + matchLabels: { + 'k8s-app': 'kube-dns', + }, + }, + namespaceSelector: { + matchNames: [ + 'kube-system', + ], + }, + endpoints: [ + { + port: 'metrics', + interval: '15s', + bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + }, + ], + }, + }, + }, +} diff --git a/jsonnet/kube-prometheus/rules/node-rules.libsonnet b/jsonnet/kube-prometheus/rules/node-rules.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..e3396b08ab4cea5bdb6cdafbe701c35829141147 --- /dev/null +++ b/jsonnet/kube-prometheus/rules/node-rules.libsonnet @@ -0,0 +1,39 @@ +{ + prometheusRules+:: { + groups+: [ + { + name: 'kube-prometheus-node-recording.rules', + rules: [ + { + expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)', + record: 'instance:node_cpu:rate:sum', + }, + { + expr: 'sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)', + record: 'instance:node_filesystem_usage:sum', + }, + { + expr: 'sum(rate(node_network_receive_bytes_total[3m])) BY (instance)', + record: 'instance:node_network_receive_bytes:rate:sum', + }, + { + expr: 'sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)', + record: 'instance:node_network_transmit_bytes:rate:sum', + }, + { + expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)', + record: 'instance:node_cpu:ratio', + }, + { + expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))', + record: 'cluster:node_cpu:sum_rate5m', + }, + { + expr: 'cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))', + record: 'cluster:node_cpu:ratio', + }, + ], + }, + ], + }, +} diff --git a/jsonnet/kube-prometheus/rules/rules.libsonnet b/jsonnet/kube-prometheus/rules/rules.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..b0217aba8fbe77852d9a17c41f09553159165b04 --- /dev/null +++ b/jsonnet/kube-prometheus/rules/rules.libsonnet @@ -0,0 +1 @@ +(import 'node-rules.libsonnet') diff --git a/jsonnetfile.json b/jsonnetfile.json new file mode 100644 index 0000000000000000000000000000000000000000..619586b23b2ed4030f66bb5101abfff465f8463f --- /dev/null +++ b/jsonnetfile.json @@ -0,0 +1,14 @@ +{ + "dependencies": [ + { + "name": "kube-prometheus", + "source": { + "git": { + "remote": "../../", + "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" + } + }, + "version": "." + } + ] +} diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json new file mode 100644 index 0000000000000000000000000000000000000000..ed5c26cc2ad6713e717b804a93c27c1745af03bc --- /dev/null +++ b/jsonnetfile.lock.json @@ -0,0 +1,84 @@ +{ + "dependencies": [ + { + "name": "kube-prometheus", + "source": { + "git": { + "remote": "../../", + "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" + } + }, + "version": "82817c8f9277c82ca164a6ef75bf476e56f24521" + }, + { + "name": "ksonnet", + "source": { + "git": { + "remote": "https://github.com/ksonnet/ksonnet-lib", + "subdir": "" + } + }, + "version": "d03da231d6c8bd74437b74a1e9e8b966f13dffa2" + }, + { + "name": "kubernetes-mixin", + "source": { + "git": { + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", + "subdir": "" + } + }, + "version": "3991660410dab201bb1f60b84d26e027c6c877e4" + }, + { + "name": "grafonnet", + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib", + "subdir": "grafonnet" + } + }, + "version": "d270f529db9eb750425a173188c534ab92532f47" + }, + { + "name": "grafana-builder", + "source": { + "git": { + "remote": "https://github.com/kausalco/public", + "subdir": "grafana-builder" + } + }, + "version": "2c635c3310c6e61720871ac94d6d2572e37b83f7" + }, + { + "name": "grafana", + "source": { + "git": { + "remote": "https://github.com/brancz/kubernetes-grafana", + "subdir": "grafana" + } + }, + "version": "de2ec3f0f9115da2d47dc6b86af9b402e2bf146d" + }, + { + "name": "prometheus-operator", + "source": { + "git": { + "remote": "https://github.com/coreos/prometheus-operator", + "subdir": "jsonnet/prometheus-operator" + } + }, + "version": "7a25bf6b6bb2347dacb235659b73bc210117acc7" + }, + { + "name": "etcd-mixin", + "source": { + "git": { + "remote": "https://github.com/coreos/etcd", + "subdir": "Documentation/etcd-mixin" + } + }, + "version": "a621d807f061e1dd635033a8d6bc261461429e27" + } + ] +} diff --git a/kustomization.yaml b/kustomization.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc91b965e71d7c4eca1f4696ee67fdfa3e024693 --- /dev/null +++ b/kustomization.yaml @@ -0,0 +1,66 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ./manifests/00namespace-namespace.yaml +- ./manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml +- ./manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml +- ./manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml +- ./manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml +- ./manifests/0prometheus-operator-clusterRole.yaml +- ./manifests/0prometheus-operator-clusterRoleBinding.yaml +- ./manifests/0prometheus-operator-deployment.yaml +- ./manifests/0prometheus-operator-service.yaml +- ./manifests/0prometheus-operator-serviceAccount.yaml +- ./manifests/0prometheus-operator-serviceMonitor.yaml +- ./manifests/alertmanager-alertmanager.yaml +- ./manifests/alertmanager-secret.yaml +- ./manifests/alertmanager-service.yaml +- ./manifests/alertmanager-serviceAccount.yaml +- ./manifests/alertmanager-serviceMonitor.yaml +- ./manifests/grafana-dashboardDatasources.yaml +- ./manifests/grafana-dashboardDefinitions.yaml +- ./manifests/grafana-dashboardSources.yaml +- ./manifests/grafana-deployment.yaml +- ./manifests/grafana-service.yaml +- ./manifests/grafana-serviceAccount.yaml +- ./manifests/grafana-serviceMonitor.yaml +- ./manifests/kube-state-metrics-clusterRole.yaml +- ./manifests/kube-state-metrics-clusterRoleBinding.yaml +- ./manifests/kube-state-metrics-deployment.yaml +- ./manifests/kube-state-metrics-role.yaml +- ./manifests/kube-state-metrics-roleBinding.yaml +- ./manifests/kube-state-metrics-service.yaml +- ./manifests/kube-state-metrics-serviceAccount.yaml +- ./manifests/kube-state-metrics-serviceMonitor.yaml +- ./manifests/node-exporter-clusterRole.yaml +- ./manifests/node-exporter-clusterRoleBinding.yaml +- ./manifests/node-exporter-daemonset.yaml +- ./manifests/node-exporter-service.yaml +- ./manifests/node-exporter-serviceAccount.yaml +- ./manifests/node-exporter-serviceMonitor.yaml +- ./manifests/prometheus-adapter-apiService.yaml +- ./manifests/prometheus-adapter-clusterRole.yaml +- ./manifests/prometheus-adapter-clusterRoleBinding.yaml +- ./manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml +- ./manifests/prometheus-adapter-clusterRoleServerResources.yaml +- ./manifests/prometheus-adapter-configMap.yaml +- ./manifests/prometheus-adapter-deployment.yaml +- ./manifests/prometheus-adapter-roleBindingAuthReader.yaml +- ./manifests/prometheus-adapter-service.yaml +- ./manifests/prometheus-adapter-serviceAccount.yaml +- ./manifests/prometheus-clusterRole.yaml +- ./manifests/prometheus-clusterRoleBinding.yaml +- ./manifests/prometheus-prometheus.yaml +- ./manifests/prometheus-roleBindingConfig.yaml +- ./manifests/prometheus-roleBindingSpecificNamespaces.yaml +- ./manifests/prometheus-roleConfig.yaml +- ./manifests/prometheus-roleSpecificNamespaces.yaml +- ./manifests/prometheus-rules.yaml +- ./manifests/prometheus-service.yaml +- ./manifests/prometheus-serviceAccount.yaml +- ./manifests/prometheus-serviceMonitor.yaml +- ./manifests/prometheus-serviceMonitorApiserver.yaml +- ./manifests/prometheus-serviceMonitorCoreDNS.yaml +- ./manifests/prometheus-serviceMonitorKubeControllerManager.yaml +- ./manifests/prometheus-serviceMonitorKubeScheduler.yaml +- ./manifests/prometheus-serviceMonitorKubelet.yaml diff --git a/manifests/00namespace-namespace.yaml b/manifests/00namespace-namespace.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d32523606f28187cc65fbb56387a78011a1e9425 --- /dev/null +++ b/manifests/00namespace-namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring diff --git a/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml b/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..821e313d537e2b55287345343a2fd2edb61d4274 --- /dev/null +++ b/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml @@ -0,0 +1,2411 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: alertmanagers.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: Alertmanager + plural: alertmanagers + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + spec: + description: 'AlertmanagerSpec is a specification of the desired behavior + of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + additionalPeers: + description: AdditionalPeers allows injecting a set of additional Alertmanagers + to peer with to form a highly available cluster. + items: + type: string + type: array + affinity: + description: Affinity is a group of affinity scheduling rules. + properties: + nodeAffinity: + description: Node affinity is a group of node affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: An empty preferred scheduling term matches all + objects with implicit weight 0 (i.e. it's a no-op). A null + preferred scheduling term matches no objects (i.e. is also + a no-op). + properties: + preference: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + weight: + description: Weight associated with matching the corresponding + nodeSelectorTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - preference + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: A node selector represents the union of the results + of one or more label queries over a set of nodes; that is, + it represents the OR of the selectors represented by the node + selector terms. + properties: + nodeSelectorTerms: + description: Required. A list of node selector terms. The + terms are ORed. + items: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + type: array + required: + - nodeSelectorTerms + podAffinity: + description: Pod affinity is a group of inter pod affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node has pods which matches the corresponding podAffinityTerm; + the node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key <topologyKey> matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the affinity requirements specified by this + field are not met at scheduling time, the pod will not be + scheduled onto the node. If the affinity requirements specified + by this field cease to be met at some point during pod execution + (e.g. due to a pod label update), the system may or may not + try to eventually evict the pod from its node. When there + are multiple elements, the lists of nodes corresponding to + each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key <topologyKey> + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: array + podAntiAffinity: + description: Pod anti affinity is a group of inter pod anti affinity + scheduling rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the anti-affinity expressions specified by this + field, but it may choose a node that violates one or more + of the expressions. The node that is most preferred is the + one with the greatest sum of weights, i.e. for each node that + meets all of the scheduling requirements (resource request, + requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field + and adding "weight" to the sum if the node has pods which + matches the corresponding podAffinityTerm; the node(s) with + the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key <topologyKey> matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the anti-affinity requirements specified by + this field are not met at scheduling time, the pod will not + be scheduled onto the node. If the anti-affinity requirements + specified by this field cease to be met at some point during + pod execution (e.g. due to a pod label update), the system + may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding + to each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key <topologyKey> + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: array + baseImage: + description: Base image that is used to deploy pods, without tag. + type: string + configMaps: + description: ConfigMaps is a list of ConfigMaps in the same namespace + as the Alertmanager object, which shall be mounted into the Alertmanager + Pods. The ConfigMaps are mounted into /etc/alertmanager/configmaps/<configmap-name>. + items: + type: string + type: array + containers: + description: Containers allows injecting additional containers. This + is meant to allow adding an authentication proxy to an Alertmanager + pod. + items: + description: A single application container that you want to run within + a pod. + properties: + args: + description: 'Arguments to the entrypoint. The docker image''s + CMD is used if this is not provided. Variable references $(VAR_NAME) + are expanded using the container''s environment. If a variable + cannot be resolved, the reference in the input string will be + unchanged. The $(VAR_NAME) syntax can be escaped with a double + $$, ie: $$(VAR_NAME). Escaped references will never be expanded, + regardless of whether the variable exists or not. Cannot be + updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + command: + description: 'Entrypoint array. Not executed within a shell. The + docker image''s ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container''s + environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. The $(VAR_NAME) syntax + can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable exists + or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + env: + description: List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be a + C_IDENTIFIER. + type: string + value: + description: 'Variable references $(VAR_NAME) are expanded + using the previous defined environment variables in the + container and any service environment variables. If a + variable cannot be resolved, the reference in the input + string will be unchanged. The $(VAR_NAME) syntax can be + escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable + exists or not. Defaults to "".' + type: string + valueFrom: + description: EnvVarSource represents a source for the value + of an EnvVar. + properties: + configMapKeyRef: + description: Selects a key from a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap or it's + key must be defined + type: boolean + required: + - key + fieldRef: + description: ObjectFieldSelector selects an APIVersioned + field of an object. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + resourceFieldRef: + description: ResourceFieldSelector represents container + resources (cpu, memory) and their output format + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: {} + resource: + description: 'Required: resource to select' + type: string + required: + - resource + secretKeyRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's + key must be defined + type: boolean + required: + - key + required: + - name + type: array + envFrom: + description: List of sources to populate environment variables + in the container. The keys defined within a source must be a + C_IDENTIFIER. All invalid keys will be reported as an event + when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take + precedence. Values defined by an Env with a duplicate key will + take precedence. Cannot be updated. + items: + description: EnvFromSource represents the source of a set of + ConfigMaps + properties: + configMapRef: + description: |- + ConfigMapEnvSource selects a ConfigMap to populate the environment variables with. + + The contents of the target ConfigMap's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap must be defined + type: boolean + prefix: + description: An optional identifier to prepend to each key + in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: |- + SecretEnvSource selects a Secret to populate the environment variables with. + + The contents of the target Secret's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret must be defined + type: boolean + type: array + image: + description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management + to default or override container images in workload controllers + like Deployments and StatefulSets.' + type: string + imagePullPolicy: + description: 'Image pull policy. One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent + otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' + type: string + lifecycle: + description: Lifecycle describes actions that the management system + should take in response to container lifecycle events. For the + PostStart and PreStop lifecycle handlers, management of the + container blocks until the action is complete, unless the container + process fails, in which case the handler is aborted. + properties: + postStart: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + preStop: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + livenessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + name: + description: Name of the container specified as a DNS_LABEL. Each + container in a pod must have a unique name (DNS_LABEL). Cannot + be updated. + type: string + ports: + description: List of ports to expose from the container. Exposing + a port here gives the system additional information about the + network connections a container uses, but is primarily informational. + Not specifying a port here DOES NOT prevent that port from being + exposed. Any port which is listening on the default "0.0.0.0" + address inside a container will be accessible from the network. + Cannot be updated. + items: + description: ContainerPort represents a network port in a single + container. + properties: + containerPort: + description: Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external port to. + type: string + hostPort: + description: Number of port to expose on the host. If specified, + this must be a valid port number, 0 < x < 65536. If HostNetwork + is specified, this must match ContainerPort. Most containers + do not need this. + format: int32 + type: integer + name: + description: If specified, this must be an IANA_SVC_NAME + and unique within the pod. Each named port in a pod must + have a unique name. Name for the port that can be referred + to by services. + type: string + protocol: + description: Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: array + readinessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + resources: + description: ResourceRequirements describes the compute resource + requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + securityContext: + description: SecurityContext holds security configuration that + will be applied to a container. Some fields are present in both + SecurityContext and PodSecurityContext. When both are set, + the values in SecurityContext take precedence. + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether a + process can gain more privileges than its parent process. + This bool directly controls if the no_new_privs flag will + be set on the container process. AllowPrivilegeEscalation + is true always when the container is: 1) run as Privileged + 2) has CAP_SYS_ADMIN' + type: boolean + capabilities: + description: Adds and removes POSIX capabilities from running + containers. + properties: + add: + description: Added capabilities + items: + type: string + type: array + drop: + description: Removed capabilities + items: + type: string + type: array + privileged: + description: Run container in privileged mode. Processes in + privileged containers are essentially equivalent to root + on the host. Defaults to false. + type: boolean + procMount: + description: procMount denotes the type of proc mount to use + for the containers. The default is DefaultProcMount which + uses the container runtime defaults for readonly paths and + masked paths. This requires the ProcMountType feature flag + to be enabled. + type: string + readOnlyRootFilesystem: + description: Whether this container has a read-only root filesystem. + Default is false. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container + process. Uses runtime default if unset. May also be set + in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail + to start the container if it does. If unset or false, no + such validation will be performed. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container + process. Defaults to user specified in image metadata if + unspecified. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to + the container + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + stdin: + description: Whether this container should allocate a buffer for + stdin in the container runtime. If this is not set, reads from + stdin in the container will always result in EOF. Default is + false. + type: boolean + stdinOnce: + description: Whether the container runtime should close the stdin + channel after it has been opened by a single attach. When stdin + is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container + start, is empty until the first client attaches to stdin, and + then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container + is restarted. If this flag is false, a container processes that + reads from stdin will never receive an EOF. Default is false + type: boolean + terminationMessagePath: + description: 'Optional: Path at which the file to which the container''s + termination message will be written is mounted into the container''s + filesystem. Message written is intended to be brief final status, + such as an assertion failure message. Will be truncated by the + node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. Defaults to /dev/termination-log. + Cannot be updated.' + type: string + terminationMessagePolicy: + description: Indicate how the termination message should be populated. + File will use the contents of terminationMessagePath to populate + the container status message on both success and failure. FallbackToLogsOnError + will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever + is smaller. Defaults to File. Cannot be updated. + type: string + tty: + description: Whether this container should allocate a TTY for + itself, also requires 'stdin' to be true. Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices to be + used by the container. This is a beta feature. + items: + description: volumeDevice describes a mapping of a raw block + device within a container. + properties: + devicePath: + description: devicePath is the path inside of the container + that the device will be mapped to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - name + - devicePath + type: array + volumeMounts: + description: Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume within + a container. + properties: + mountPath: + description: Path within the container at which the volume + should be mounted. Must not contain ':'. + type: string + mountPropagation: + description: mountPropagation determines how mounts are + propagated from the host to container and the other way + around. When not set, MountPropagationNone is used. This + field is beta in 1.10. + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: Mounted read-only if true, read-write otherwise + (false or unspecified). Defaults to false. + type: boolean + subPath: + description: Path within the volume from which the container's + volume should be mounted. Defaults to "" (volume's root). + type: string + required: + - name + - mountPath + type: array + workingDir: + description: Container's working directory. If not specified, + the container runtime's default will be used, which might be + configured in the container image. Cannot be updated. + type: string + required: + - name + type: array + externalUrl: + description: The external URL the Alertmanager instances will be available + under. This is necessary to generate correct URLs. This is necessary + if Alertmanager is not served from root of a DNS name. + type: string + image: + description: Image if specified has precedence over baseImage, tag and + sha combinations. Specifying the version is still necessary to ensure + the Prometheus Operator knows what version of Alertmanager is being + configured. + type: string + imagePullSecrets: + description: An optional list of references to secrets in the same namespace + to use for pulling prometheus and alertmanager images from registries + see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod + items: + description: LocalObjectReference contains enough information to let + you locate the referenced object inside the same namespace. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + type: array + listenLocal: + description: ListenLocal makes the Alertmanager server listen on loopback, + so that it does not bind against the Pod IP. Note this is only for + the Alertmanager UI, not the gossip communication. + type: boolean + logLevel: + description: Log level for Alertmanager to be configured with. + type: string + nodeSelector: + description: Define which Nodes the Pods are scheduled on. + type: object + paused: + description: If set to true all actions on the underlaying managed objects + are not goint to be performed, except for delete actions. + type: boolean + podMetadata: + description: ObjectMeta is metadata that all persisted resources must + have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map stored + with a resource that may be set by external tools to store and + retrieve arbitrary metadata. They are not queryable and should + be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs to. + This is used to distinguish resources with same name and namespace + in different clusters. This field is not set anywhere right now + and apiserver is going to ignore it if set in create or update + request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to gracefully + terminate before it will be removed from the system. Only set + when deletionTimestamp is also set. May only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted from the + registry. Each entry is an identifier for the responsible component + that will remove the entry from the list. If the deletionTimestamp + of the object is non-nil, entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that must execute + in order before this object is visible. When the last pending + initializer is removed, and no failing result is set, the + initializers struct will be set to nil and the object is considered + as initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible for + initializing this object. + type: string + required: + - name + type: array + result: + description: Status is a return value for calls that don't return + other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of + this representation of an object. Servers should convert + recognized schemas to the latest internal value, and may + reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this status, + 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional properties + that MAY be set by the server to provide additional information + about a response. The Reason field of a Status object + defines what attributes will be set. Clients must ignore + fields that do not match the defined type of each attribute, + and should assume that any attribute may be empty, invalid, + or under defined. + properties: + causes: + description: The Causes array includes more details + associated with the StatusReason failure. Not all + StatusReasons may provide detailed causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases when + multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description of the + cause of the error. This field may be presented + as-is to a reader. + type: string + reason: + description: A machine-readable description of + the cause of the error. If this value is empty + there is no information available. + type: string + type: array + group: + description: The group attribute of the resource associated + with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource associated + with the status StatusReason. On some operations may + differ from the requested resource Kind. More info: + https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource associated + with the status StatusReason (when there is a single + name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds before + the operation should be retried. Some errors may indicate + the client must take an alternate action - for those + errors this field may indicate how long to wait before + taking the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there is a + single resource which can be described). More info: + http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + kind: + description: 'Kind is a string value representing the REST + resource this object represents. Servers may infer this + from the endpoint the client submits requests to. Cannot + be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the status + of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various status + objects. A resource may have only one of {ObjectMeta, + ListMeta}. + properties: + continue: + description: continue may be set if the user set a limit + on the number of items returned, and indicates that + the server has more data available. The value is opaque + and may be used to issue another request to the endpoint + that served this list to retrieve the next set of + available objects. Continuing a consistent list may + not be possible if the server configuration has changed + or more than a few minutes have passed. The resourceVersion + field returned when using this continue value will + be identical to the value in the first response, unless + you have received this token from an error message. + type: string + resourceVersion: + description: 'String that identifies the server''s internal + version of this object that can be used by clients + to determine when objects have changed. Value must + be treated as opaque by clients and passed unmodified + back to the server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + reason: + description: A machine-readable description of why this + operation is in the "Failure" status. If this value is + empty there is no information available. A Reason clarifies + an HTTP status code but does not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + required: + - pending + labels: + description: 'Map of string keys and values that can be used to + organize and categorize (scope and select) objects. May match + selectors of replication controllers and services. More info: + http://kubernetes.io/docs/user-guide/labels' + type: object + name: + description: 'Name must be unique within a namespace. Is required + when creating resources, although some resources may allow a client + to request the generation of an appropriate name automatically. + Name is primarily intended for creation idempotence and configuration + definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If ALL objects + in the list have been deleted, this object will be garbage collected. + If this object is managed by a controller, then an entry in this + list will point to this controller, with the controller field + set to true. There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information to let + you identify an owning object. An owning object must be in the + same namespace as the dependent, or be cluster-scoped, so there + is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from the key-value + store until this reference is removed. Defaults to false. + To set this field, a user needs "delete" permission of the + owner, otherwise 422 (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the managing + controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. Populated + by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + priorityClassName: + description: Priority class assigned to the Pods + type: string + replicas: + description: Size is the expected size of the alertmanager cluster. + The controller will eventually make the size of the running cluster + equal to the expected size. + format: int32 + type: integer + resources: + description: ResourceRequirements describes the compute resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute resources + allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute resources + required. If Requests is omitted for a container, it defaults + to Limits if that is explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + retention: + description: Time duration Alertmanager shall retain data for. Default + is '120h', and must match the regular expression `[0-9]+(ms|s|m|h)` + (milliseconds seconds minutes hours). + type: string + routePrefix: + description: The route prefix Alertmanager registers HTTP handlers for. + This is useful, if using ExternalURL and a proxy is rewriting HTTP + routes of a request, and the actual ExternalURL is still true, but + the server serves requests under a different route prefix. For example + for use with `kubectl proxy`. + type: string + secrets: + description: Secrets is a list of Secrets in the same namespace as the + Alertmanager object, which shall be mounted into the Alertmanager + Pods. The Secrets are mounted into /etc/alertmanager/secrets/<secret-name>. + items: + type: string + type: array + securityContext: + description: PodSecurityContext holds pod-level security attributes + and common container settings. Some fields are also present in container.securityContext. Field + values of container.securityContext take precedence over field values + of PodSecurityContext. + properties: + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. Some volume types allow the Kubelet to change the ownership of that volume to be owned by the pod: + + 1. The owning GID will be the FSGroup 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + format: int64 + type: integer + runAsGroup: + description: The GID to run the entrypoint of the container process. + Uses runtime default if unset. May also be set in SecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence for that container. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail to start + the container if it does. If unset or false, no such validation + will be performed. May also be set in SecurityContext. If set + in both SecurityContext and PodSecurityContext, the value specified + in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. May + also be set in SecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to the + container + properties: + level: + description: Level is SELinux level label that applies to the + container. + type: string + role: + description: Role is a SELinux role label that applies to the + container. + type: string + type: + description: Type is a SELinux type label that applies to the + container. + type: string + user: + description: User is a SELinux user label that applies to the + container. + type: string + supplementalGroups: + description: A list of groups applied to the first process run in + each container, in addition to the container's primary GID. If + unspecified, no groups will be added to any container. + items: + format: int64 + type: integer + type: array + sysctls: + description: Sysctls hold a list of namespaced sysctls used for + the pod. Pods with unsupported sysctls (by the container runtime) + might fail to launch. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: array + serviceAccountName: + description: ServiceAccountName is the name of the ServiceAccount to + use to run the Prometheus Pods. + type: string + sha: + description: SHA of Alertmanager container image to be deployed. Defaults + to the value of `version`. Similar to a tag, but the SHA explicitly + deploys an immutable container image. Version and Tag are ignored + if SHA is set. + type: string + storage: + description: StorageSpec defines the configured storage for a group + Prometheus servers. If neither `emptyDir` nor `volumeClaimTemplate` + is specified, then by default an [EmptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) + will be used. + properties: + emptyDir: + description: Represents an empty directory for a pod. Empty directory + volumes support ownership management and SELinux relabeling. + properties: + medium: + description: 'What type of storage medium should back this directory. + The default is "" which means to use the node''s default medium. + Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' + type: string + sizeLimit: {} + volumeClaimTemplate: + description: PersistentVolumeClaim is a user's request for and claim + to a persistent volume + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this + representation of an object. Servers should convert recognized + schemas to the latest internal value, and may reject unrecognized + values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource + this object represents. Servers may infer this from the endpoint + the client submits requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + metadata: + description: ObjectMeta is metadata that all persisted resources + must have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map + stored with a resource that may be set by external tools + to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs + to. This is used to distinguish resources with same name + and namespace in different clusters. This field is not + set anywhere right now and apiserver is going to ignore + it if set in create or update request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to + gracefully terminate before it will be removed from the + system. Only set when deletionTimestamp is also set. May + only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted + from the registry. Each entry is an identifier for the + responsible component that will remove the entry from + the list. If the deletionTimestamp of the object is non-nil, + entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that + must execute in order before this object is visible. + When the last pending initializer is removed, and + no failing result is set, the initializers struct + will be set to nil and the object is considered as + initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible + for initializing this object. + type: string + required: + - name + type: array + result: + description: Status is a return value for calls that + don't return other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema + of this representation of an object. Servers should + convert recognized schemas to the latest internal + value, and may reject unrecognized values. More + info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this + status, 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional + properties that MAY be set by the server to provide + additional information about a response. The Reason + field of a Status object defines what attributes + will be set. Clients must ignore fields that do + not match the defined type of each attribute, + and should assume that any attribute may be empty, + invalid, or under defined. + properties: + causes: + description: The Causes array includes more + details associated with the StatusReason failure. + Not all StatusReasons may provide detailed + causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases + when multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description + of the cause of the error. This field + may be presented as-is to a reader. + type: string + reason: + description: A machine-readable description + of the cause of the error. If this value + is empty there is no information available. + type: string + type: array + group: + description: The group attribute of the resource + associated with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource + associated with the status StatusReason. On + some operations may differ from the requested + resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource + associated with the status StatusReason (when + there is a single name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds + before the operation should be retried. Some + errors may indicate the client must take an + alternate action - for those errors this field + may indicate how long to wait before taking + the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there + is a single resource which can be described). + More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + kind: + description: 'Kind is a string value representing + the REST resource this object represents. Servers + may infer this from the endpoint the client submits + requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the + status of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various + status objects. A resource may have only one of + {ObjectMeta, ListMeta}. + properties: + continue: + description: continue may be set if the user + set a limit on the number of items returned, + and indicates that the server has more data + available. The value is opaque and may be + used to issue another request to the endpoint + that served this list to retrieve the next + set of available objects. Continuing a consistent + list may not be possible if the server configuration + has changed or more than a few minutes have + passed. The resourceVersion field returned + when using this continue value will be identical + to the value in the first response, unless + you have received this token from an error + message. + type: string + resourceVersion: + description: 'String that identifies the server''s + internal version of this object that can be + used by clients to determine when objects + have changed. Value must be treated as opaque + by clients and passed unmodified back to the + server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing + this object. Populated by the system. Read-only. + type: string + reason: + description: A machine-readable description of why + this operation is in the "Failure" status. If + this value is empty there is no information available. + A Reason clarifies an HTTP status code but does + not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + required: + - pending + labels: + description: 'Map of string keys and values that can be + used to organize and categorize (scope and select) objects. + May match selectors of replication controllers and services. + More info: http://kubernetes.io/docs/user-guide/labels' + type: object + name: + description: 'Name must be unique within a namespace. Is + required when creating resources, although some resources + may allow a client to request the generation of an appropriate + name automatically. Name is primarily intended for creation + idempotence and configuration definition. Cannot be updated. + More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If + ALL objects in the list have been deleted, this object + will be garbage collected. If this object is managed by + a controller, then an entry in this list will point to + this controller, with the controller field set to true. + There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information + to let you identify an owning object. An owning object + must be in the same namespace as the dependent, or be + cluster-scoped, so there is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from + the key-value store until this reference is removed. + Defaults to false. To set this field, a user needs + "delete" permission of the owner, otherwise 422 + (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the + managing controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + spec: + description: PersistentVolumeClaimSpec describes the common + attributes of storage devices and allows a Source for provider-specific + attributes + properties: + accessModes: + description: 'AccessModes contains the desired access modes + the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + dataSource: + description: TypedLocalObjectReference contains enough information + to let you locate the typed referenced object inside the + same namespace. + properties: + apiGroup: + description: APIGroup is the group for the resource + being referenced. If APIGroup is not specified, the + specified Kind must be in the core API group. For + any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + resources: + description: ResourceRequirements describes the compute + resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of + compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount + of compute resources required. If Requests is omitted + for a container, it defaults to Limits if that is + explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + selector: + description: A label selector is a label query over a set + of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + storageClassName: + description: 'Name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + type: string + volumeMode: + description: volumeMode defines what type of volume is required + by the claim. Value of Filesystem is implied when not + included in claim spec. This is a beta feature. + type: string + volumeName: + description: VolumeName is the binding reference to the + PersistentVolume backing this claim. + type: string + status: + description: PersistentVolumeClaimStatus is the current status + of a persistent volume claim. + properties: + accessModes: + description: 'AccessModes contains the actual access modes + the volume backing the PVC has. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + capacity: + description: Represents the actual resources of the underlying + volume. + type: object + conditions: + description: Current Condition of persistent volume claim. + If underlying persistent volume is being resized then + the Condition will be set to 'ResizeStarted'. + items: + description: PersistentVolumeClaimCondition contails details + about state of pvc + properties: + lastProbeTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + lastTransitionTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + message: + description: Human-readable message indicating details + about last transition. + type: string + reason: + description: Unique, this should be a short, machine + understandable string that gives the reason for + condition's last transition. If it reports "ResizeStarted" + that means the underlying persistent volume is being + resized. + type: string + status: + type: string + type: + type: string + required: + - type + - status + type: array + phase: + description: Phase represents the current phase of PersistentVolumeClaim. + type: string + tag: + description: Tag of Alertmanager container image to be deployed. Defaults + to the value of `version`. Version is ignored if Tag is set. + type: string + tolerations: + description: If specified, the pod's tolerations. + items: + description: The pod this Toleration is attached to tolerates any + taint that matches the triple <key,value,effect> using the matching + operator <operator>. + properties: + effect: + description: Effect indicates the taint effect to match. Empty + means match all taint effects. When specified, allowed values + are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the toleration applies + to. Empty means match all taint keys. If the key is empty, operator + must be Exists; this combination means to match all values and + all keys. + type: string + operator: + description: Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. Exists + is equivalent to wildcard for value, so that a pod can tolerate + all taints of a particular category. + type: string + tolerationSeconds: + description: TolerationSeconds represents the period of time the + toleration (which must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By default, it is not + set, which means tolerate the taint forever (do not evict). + Zero and negative values will be treated as 0 (evict immediately) + by the system. + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise + just a regular string. + type: string + type: array + version: + description: Version the cluster should be on. + type: string + status: + description: 'AlertmanagerStatus is the most recent observed status of the + Alertmanager cluster. Read-only. Not included when requesting from the + apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + availableReplicas: + description: Total number of available pods (ready for at least minReadySeconds) + targeted by this Alertmanager cluster. + format: int32 + type: integer + paused: + description: Represents whether any actions on the underlaying managed + objects are being performed. Only delete actions will be performed. + type: boolean + replicas: + description: Total number of non-terminated pods targeted by this Alertmanager + cluster (their labels match the selector). + format: int32 + type: integer + unavailableReplicas: + description: Total number of unavailable pods targeted by this Alertmanager + cluster. + format: int32 + type: integer + updatedReplicas: + description: Total number of non-terminated pods targeted by this Alertmanager + cluster that have the desired version spec. + format: int32 + type: integer + required: + - paused + - replicas + - updatedReplicas + - availableReplicas + - unavailableReplicas + version: v1 diff --git a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml b/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a69cf0a92d072c9de36f649541b07a281b892855 --- /dev/null +++ b/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml @@ -0,0 +1,3199 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: prometheuses.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: Prometheus + plural: prometheuses + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + spec: + description: 'PrometheusSpec is a specification of the desired behavior + of the Prometheus cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + additionalAlertManagerConfigs: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be a valid + secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be defined + type: boolean + required: + - key + additionalAlertRelabelConfigs: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be a valid + secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be defined + type: boolean + required: + - key + additionalScrapeConfigs: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be a valid + secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be defined + type: boolean + required: + - key + affinity: + description: Affinity is a group of affinity scheduling rules. + properties: + nodeAffinity: + description: Node affinity is a group of node affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: An empty preferred scheduling term matches all + objects with implicit weight 0 (i.e. it's a no-op). A null + preferred scheduling term matches no objects (i.e. is also + a no-op). + properties: + preference: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + weight: + description: Weight associated with matching the corresponding + nodeSelectorTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - preference + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: A node selector represents the union of the results + of one or more label queries over a set of nodes; that is, + it represents the OR of the selectors represented by the node + selector terms. + properties: + nodeSelectorTerms: + description: Required. A list of node selector terms. The + terms are ORed. + items: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + type: array + required: + - nodeSelectorTerms + podAffinity: + description: Pod affinity is a group of inter pod affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node has pods which matches the corresponding podAffinityTerm; + the node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key <topologyKey> matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the affinity requirements specified by this + field are not met at scheduling time, the pod will not be + scheduled onto the node. If the affinity requirements specified + by this field cease to be met at some point during pod execution + (e.g. due to a pod label update), the system may or may not + try to eventually evict the pod from its node. When there + are multiple elements, the lists of nodes corresponding to + each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key <topologyKey> + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: array + podAntiAffinity: + description: Pod anti affinity is a group of inter pod anti affinity + scheduling rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the anti-affinity expressions specified by this + field, but it may choose a node that violates one or more + of the expressions. The node that is most preferred is the + one with the greatest sum of weights, i.e. for each node that + meets all of the scheduling requirements (resource request, + requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field + and adding "weight" to the sum if the node has pods which + matches the corresponding podAffinityTerm; the node(s) with + the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key <topologyKey> matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the anti-affinity requirements specified by + this field are not met at scheduling time, the pod will not + be scheduled onto the node. If the anti-affinity requirements + specified by this field cease to be met at some point during + pod execution (e.g. due to a pod label update), the system + may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding + to each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key <topologyKey> + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: array + alerting: + description: AlertingSpec defines parameters for alerting configuration + of Prometheus servers. + properties: + alertmanagers: + description: AlertmanagerEndpoints Prometheus should fire alerts + against. + items: + description: AlertmanagerEndpoints defines a selection of a single + Endpoints object containing alertmanager IPs to fire alerts + against. + properties: + bearerTokenFile: + description: BearerTokenFile to read from filesystem to use + when authenticating to Alertmanager. + type: string + name: + description: Name of Endpoints object in Namespace. + type: string + namespace: + description: Namespace of Endpoints object. + type: string + pathPrefix: + description: Prefix for the HTTP path alerts are pushed to. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use when firing alerts. + type: string + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + required: + - namespace + - name + - port + type: array + required: + - alertmanagers + apiserverConfig: + description: 'APIServerConfig defines a host and auth methods to access + apiserver. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config' + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over basic + authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + bearerToken: + description: Bearer token for accessing apiserver. + type: string + bearerTokenFile: + description: File to read bearer token for accessing apiserver. + type: string + host: + description: Host of apiserver. A valid string consisting of a hostname + or IP followed by an optional port number + type: string + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + required: + - host + baseImage: + description: Base image to use for a Prometheus deployment. + type: string + configMaps: + description: ConfigMaps is a list of ConfigMaps in the same namespace + as the Prometheus object, which shall be mounted into the Prometheus + Pods. The ConfigMaps are mounted into /etc/prometheus/configmaps/<configmap-name>. + items: + type: string + type: array + containers: + description: Containers allows injecting additional containers. This + is meant to allow adding an authentication proxy to a Prometheus pod. + items: + description: A single application container that you want to run within + a pod. + properties: + args: + description: 'Arguments to the entrypoint. The docker image''s + CMD is used if this is not provided. Variable references $(VAR_NAME) + are expanded using the container''s environment. If a variable + cannot be resolved, the reference in the input string will be + unchanged. The $(VAR_NAME) syntax can be escaped with a double + $$, ie: $$(VAR_NAME). Escaped references will never be expanded, + regardless of whether the variable exists or not. Cannot be + updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + command: + description: 'Entrypoint array. Not executed within a shell. The + docker image''s ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container''s + environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. The $(VAR_NAME) syntax + can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable exists + or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + env: + description: List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be a + C_IDENTIFIER. + type: string + value: + description: 'Variable references $(VAR_NAME) are expanded + using the previous defined environment variables in the + container and any service environment variables. If a + variable cannot be resolved, the reference in the input + string will be unchanged. The $(VAR_NAME) syntax can be + escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable + exists or not. Defaults to "".' + type: string + valueFrom: + description: EnvVarSource represents a source for the value + of an EnvVar. + properties: + configMapKeyRef: + description: Selects a key from a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap or it's + key must be defined + type: boolean + required: + - key + fieldRef: + description: ObjectFieldSelector selects an APIVersioned + field of an object. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + resourceFieldRef: + description: ResourceFieldSelector represents container + resources (cpu, memory) and their output format + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: {} + resource: + description: 'Required: resource to select' + type: string + required: + - resource + secretKeyRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's + key must be defined + type: boolean + required: + - key + required: + - name + type: array + envFrom: + description: List of sources to populate environment variables + in the container. The keys defined within a source must be a + C_IDENTIFIER. All invalid keys will be reported as an event + when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take + precedence. Values defined by an Env with a duplicate key will + take precedence. Cannot be updated. + items: + description: EnvFromSource represents the source of a set of + ConfigMaps + properties: + configMapRef: + description: |- + ConfigMapEnvSource selects a ConfigMap to populate the environment variables with. + + The contents of the target ConfigMap's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap must be defined + type: boolean + prefix: + description: An optional identifier to prepend to each key + in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: |- + SecretEnvSource selects a Secret to populate the environment variables with. + + The contents of the target Secret's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret must be defined + type: boolean + type: array + image: + description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management + to default or override container images in workload controllers + like Deployments and StatefulSets.' + type: string + imagePullPolicy: + description: 'Image pull policy. One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent + otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' + type: string + lifecycle: + description: Lifecycle describes actions that the management system + should take in response to container lifecycle events. For the + PostStart and PreStop lifecycle handlers, management of the + container blocks until the action is complete, unless the container + process fails, in which case the handler is aborted. + properties: + postStart: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + preStop: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + livenessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + name: + description: Name of the container specified as a DNS_LABEL. Each + container in a pod must have a unique name (DNS_LABEL). Cannot + be updated. + type: string + ports: + description: List of ports to expose from the container. Exposing + a port here gives the system additional information about the + network connections a container uses, but is primarily informational. + Not specifying a port here DOES NOT prevent that port from being + exposed. Any port which is listening on the default "0.0.0.0" + address inside a container will be accessible from the network. + Cannot be updated. + items: + description: ContainerPort represents a network port in a single + container. + properties: + containerPort: + description: Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external port to. + type: string + hostPort: + description: Number of port to expose on the host. If specified, + this must be a valid port number, 0 < x < 65536. If HostNetwork + is specified, this must match ContainerPort. Most containers + do not need this. + format: int32 + type: integer + name: + description: If specified, this must be an IANA_SVC_NAME + and unique within the pod. Each named port in a pod must + have a unique name. Name for the port that can be referred + to by services. + type: string + protocol: + description: Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: array + readinessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + resources: + description: ResourceRequirements describes the compute resource + requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + securityContext: + description: SecurityContext holds security configuration that + will be applied to a container. Some fields are present in both + SecurityContext and PodSecurityContext. When both are set, + the values in SecurityContext take precedence. + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether a + process can gain more privileges than its parent process. + This bool directly controls if the no_new_privs flag will + be set on the container process. AllowPrivilegeEscalation + is true always when the container is: 1) run as Privileged + 2) has CAP_SYS_ADMIN' + type: boolean + capabilities: + description: Adds and removes POSIX capabilities from running + containers. + properties: + add: + description: Added capabilities + items: + type: string + type: array + drop: + description: Removed capabilities + items: + type: string + type: array + privileged: + description: Run container in privileged mode. Processes in + privileged containers are essentially equivalent to root + on the host. Defaults to false. + type: boolean + procMount: + description: procMount denotes the type of proc mount to use + for the containers. The default is DefaultProcMount which + uses the container runtime defaults for readonly paths and + masked paths. This requires the ProcMountType feature flag + to be enabled. + type: string + readOnlyRootFilesystem: + description: Whether this container has a read-only root filesystem. + Default is false. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container + process. Uses runtime default if unset. May also be set + in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail + to start the container if it does. If unset or false, no + such validation will be performed. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container + process. Defaults to user specified in image metadata if + unspecified. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to + the container + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + stdin: + description: Whether this container should allocate a buffer for + stdin in the container runtime. If this is not set, reads from + stdin in the container will always result in EOF. Default is + false. + type: boolean + stdinOnce: + description: Whether the container runtime should close the stdin + channel after it has been opened by a single attach. When stdin + is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container + start, is empty until the first client attaches to stdin, and + then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container + is restarted. If this flag is false, a container processes that + reads from stdin will never receive an EOF. Default is false + type: boolean + terminationMessagePath: + description: 'Optional: Path at which the file to which the container''s + termination message will be written is mounted into the container''s + filesystem. Message written is intended to be brief final status, + such as an assertion failure message. Will be truncated by the + node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. Defaults to /dev/termination-log. + Cannot be updated.' + type: string + terminationMessagePolicy: + description: Indicate how the termination message should be populated. + File will use the contents of terminationMessagePath to populate + the container status message on both success and failure. FallbackToLogsOnError + will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever + is smaller. Defaults to File. Cannot be updated. + type: string + tty: + description: Whether this container should allocate a TTY for + itself, also requires 'stdin' to be true. Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices to be + used by the container. This is a beta feature. + items: + description: volumeDevice describes a mapping of a raw block + device within a container. + properties: + devicePath: + description: devicePath is the path inside of the container + that the device will be mapped to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - name + - devicePath + type: array + volumeMounts: + description: Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume within + a container. + properties: + mountPath: + description: Path within the container at which the volume + should be mounted. Must not contain ':'. + type: string + mountPropagation: + description: mountPropagation determines how mounts are + propagated from the host to container and the other way + around. When not set, MountPropagationNone is used. This + field is beta in 1.10. + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: Mounted read-only if true, read-write otherwise + (false or unspecified). Defaults to false. + type: boolean + subPath: + description: Path within the volume from which the container's + volume should be mounted. Defaults to "" (volume's root). + type: string + required: + - name + - mountPath + type: array + workingDir: + description: Container's working directory. If not specified, + the container runtime's default will be used, which might be + configured in the container image. Cannot be updated. + type: string + required: + - name + type: array + enableAdminAPI: + description: 'Enable access to prometheus web admin API. Defaults to + the value of `false`. WARNING: Enabling the admin APIs enables mutating + endpoints, to delete data, shutdown Prometheus, and more. Enabling + this should be done with care and the user is advised to add additional + authentication authorization via a proxy to ensure only clients authorized + to perform these actions can do so. For more information see https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis' + type: boolean + evaluationInterval: + description: Interval between consecutive evaluations. + type: string + externalLabels: + description: The labels to add to any time series or alerts when communicating + with external systems (federation, remote storage, Alertmanager). + type: object + externalUrl: + description: The external URL the Prometheus instances will be available + under. This is necessary to generate correct URLs. This is necessary + if Prometheus is not served from root of a DNS name. + type: string + image: + description: Image if specified has precedence over baseImage, tag and + sha combinations. Specifying the version is still necessary to ensure + the Prometheus Operator knows what version of Prometheus is being + configured. + type: string + imagePullSecrets: + description: An optional list of references to secrets in the same namespace + to use for pulling prometheus and alertmanager images from registries + see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod + items: + description: LocalObjectReference contains enough information to let + you locate the referenced object inside the same namespace. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + type: array + listenLocal: + description: ListenLocal makes the Prometheus server listen on loopback, + so that it does not bind against the Pod IP. + type: boolean + logFormat: + description: Log format for Prometheus to be configured with. + type: string + logLevel: + description: Log level for Prometheus to be configured with. + type: string + nodeSelector: + description: Define which Nodes the Pods are scheduled on. + type: object + paused: + description: When a Prometheus deployment is paused, no actions except + for deletion will be performed on the underlying objects. + type: boolean + podMetadata: + description: ObjectMeta is metadata that all persisted resources must + have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map stored + with a resource that may be set by external tools to store and + retrieve arbitrary metadata. They are not queryable and should + be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs to. + This is used to distinguish resources with same name and namespace + in different clusters. This field is not set anywhere right now + and apiserver is going to ignore it if set in create or update + request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to gracefully + terminate before it will be removed from the system. Only set + when deletionTimestamp is also set. May only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted from the + registry. Each entry is an identifier for the responsible component + that will remove the entry from the list. If the deletionTimestamp + of the object is non-nil, entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that must execute + in order before this object is visible. When the last pending + initializer is removed, and no failing result is set, the + initializers struct will be set to nil and the object is considered + as initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible for + initializing this object. + type: string + required: + - name + type: array + result: + description: Status is a return value for calls that don't return + other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of + this representation of an object. Servers should convert + recognized schemas to the latest internal value, and may + reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this status, + 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional properties + that MAY be set by the server to provide additional information + about a response. The Reason field of a Status object + defines what attributes will be set. Clients must ignore + fields that do not match the defined type of each attribute, + and should assume that any attribute may be empty, invalid, + or under defined. + properties: + causes: + description: The Causes array includes more details + associated with the StatusReason failure. Not all + StatusReasons may provide detailed causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases when + multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description of the + cause of the error. This field may be presented + as-is to a reader. + type: string + reason: + description: A machine-readable description of + the cause of the error. If this value is empty + there is no information available. + type: string + type: array + group: + description: The group attribute of the resource associated + with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource associated + with the status StatusReason. On some operations may + differ from the requested resource Kind. More info: + https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource associated + with the status StatusReason (when there is a single + name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds before + the operation should be retried. Some errors may indicate + the client must take an alternate action - for those + errors this field may indicate how long to wait before + taking the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there is a + single resource which can be described). More info: + http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + kind: + description: 'Kind is a string value representing the REST + resource this object represents. Servers may infer this + from the endpoint the client submits requests to. Cannot + be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the status + of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various status + objects. A resource may have only one of {ObjectMeta, + ListMeta}. + properties: + continue: + description: continue may be set if the user set a limit + on the number of items returned, and indicates that + the server has more data available. The value is opaque + and may be used to issue another request to the endpoint + that served this list to retrieve the next set of + available objects. Continuing a consistent list may + not be possible if the server configuration has changed + or more than a few minutes have passed. The resourceVersion + field returned when using this continue value will + be identical to the value in the first response, unless + you have received this token from an error message. + type: string + resourceVersion: + description: 'String that identifies the server''s internal + version of this object that can be used by clients + to determine when objects have changed. Value must + be treated as opaque by clients and passed unmodified + back to the server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + reason: + description: A machine-readable description of why this + operation is in the "Failure" status. If this value is + empty there is no information available. A Reason clarifies + an HTTP status code but does not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + required: + - pending + labels: + description: 'Map of string keys and values that can be used to + organize and categorize (scope and select) objects. May match + selectors of replication controllers and services. More info: + http://kubernetes.io/docs/user-guide/labels' + type: object + name: + description: 'Name must be unique within a namespace. Is required + when creating resources, although some resources may allow a client + to request the generation of an appropriate name automatically. + Name is primarily intended for creation idempotence and configuration + definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If ALL objects + in the list have been deleted, this object will be garbage collected. + If this object is managed by a controller, then an entry in this + list will point to this controller, with the controller field + set to true. There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information to let + you identify an owning object. An owning object must be in the + same namespace as the dependent, or be cluster-scoped, so there + is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from the key-value + store until this reference is removed. Defaults to false. + To set this field, a user needs "delete" permission of the + owner, otherwise 422 (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the managing + controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. Populated + by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + priorityClassName: + description: Priority class assigned to the Pods + type: string + query: + description: QuerySpec defines the query command line flags when starting + Prometheus. + properties: + lookbackDelta: + description: The delta difference allowed for retrieving metrics + during expression evaluations. + type: string + maxConcurrency: + description: Number of concurrent queries that can be run at once. + format: int32 + type: integer + timeout: + description: Maximum time a query may take before being aborted. + type: string + remoteRead: + description: If specified, the remote_read spec. This is an experimental + feature, it may change in any upcoming release in a breaking way. + items: + description: RemoteReadSpec defines the remote_read configuration + for prometheus. + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over + basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + bearerToken: + description: bearer token for remote read. + type: string + bearerTokenFile: + description: File to read bearer token for remote read. + type: string + proxyUrl: + description: Optional ProxyURL + type: string + readRecent: + description: Whether reads should be made for queries for time + ranges that the local storage should have complete data for. + type: boolean + remoteTimeout: + description: Timeout for requests to the remote read endpoint. + type: string + requiredMatchers: + description: An optional list of equality matchers which have + to be present in a selector to query the remote read endpoint. + type: object + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + url: + description: The URL of the endpoint to send samples to. + type: string + required: + - url + type: array + remoteWrite: + description: If specified, the remote_write spec. This is an experimental + feature, it may change in any upcoming release in a breaking way. + items: + description: RemoteWriteSpec defines the remote_write configuration + for prometheus. + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over + basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + bearerToken: + description: File to read bearer token for remote write. + type: string + bearerTokenFile: + description: File to read bearer token for remote write. + type: string + proxyUrl: + description: Optional ProxyURL + type: string + queueConfig: + description: QueueConfig allows the tuning of remote_write queue_config + parameters. This object is referenced in the RemoteWriteSpec + object. + properties: + batchSendDeadline: + description: BatchSendDeadline is the maximum time a sample + will wait in buffer. + type: string + capacity: + description: Capacity is the number of samples to buffer per + shard before we start dropping them. + format: int32 + type: integer + maxBackoff: + description: MaxBackoff is the maximum retry delay. + type: string + maxRetries: + description: MaxRetries is the maximum number of times to + retry a batch on recoverable errors. + format: int32 + type: integer + maxSamplesPerSend: + description: MaxSamplesPerSend is the maximum number of samples + per send. + format: int32 + type: integer + maxShards: + description: MaxShards is the maximum number of shards, i.e. + amount of concurrency. + format: int32 + type: integer + minBackoff: + description: MinBackoff is the initial retry delay. Gets doubled + for every retry. + type: string + minShards: + description: MinShards is the minimum number of shards, i.e. + amount of concurrency. + format: int32 + type: integer + remoteTimeout: + description: Timeout for requests to the remote write endpoint. + type: string + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + url: + description: The URL of the endpoint to send samples to. + type: string + writeRelabelConfigs: + description: The list of remote write relabel configurations. + items: + description: 'RelabelConfig allows dynamic rewriting of the + label set, being applied to samples before ingestion. It defines + `<metric_relabel_configs>`-section of Prometheus configuration. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + description: Action to perform based on regex matching. + Default is 'replace' + type: string + modulus: + description: Modulus to take of the hash of the source label + values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted + value is matched. defailt is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace + is performed if the regular expression matches. Regex + capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular expression + for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + type: array + required: + - url + type: array + replicaExternalLabelName: + description: Name of Prometheus external label used to denote replica + name. Defaults to the value of `prometheus_replica`. + type: string + replicas: + description: Number of instances to deploy for a Prometheus deployment. + format: int32 + type: integer + resources: + description: ResourceRequirements describes the compute resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute resources + allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute resources + required. If Requests is omitted for a container, it defaults + to Limits if that is explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + retention: + description: Time duration Prometheus shall retain data for. Default + is '24h', and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)` + (milliseconds seconds minutes hours days weeks years). + type: string + routePrefix: + description: The route prefix Prometheus registers HTTP handlers for. + This is useful, if using ExternalURL and a proxy is rewriting HTTP + routes of a request, and the actual ExternalURL is still true, but + the server serves requests under a different route prefix. For example + for use with `kubectl proxy`. + type: string + ruleNamespaceSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + ruleSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + rules: + description: /--rules.*/ command-line arguments + properties: + alert: + description: /--rules.alert.*/ command-line arguments + properties: + forGracePeriod: + description: Minimum duration between alert and restored 'for' + state. This is maintained only for alerts with configured + 'for' time greater than grace period. + type: string + forOutageTolerance: + description: Max time to tolerate prometheus outage for restoring + 'for' state of alert. + type: string + resendDelay: + description: Minimum amount of time to wait before resending + an alert to Alertmanager. + type: string + scrapeInterval: + description: Interval between consecutive scrapes. + type: string + secrets: + description: Secrets is a list of Secrets in the same namespace as the + Prometheus object, which shall be mounted into the Prometheus Pods. + The Secrets are mounted into /etc/prometheus/secrets/<secret-name>. + items: + type: string + type: array + securityContext: + description: PodSecurityContext holds pod-level security attributes + and common container settings. Some fields are also present in container.securityContext. Field + values of container.securityContext take precedence over field values + of PodSecurityContext. + properties: + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. Some volume types allow the Kubelet to change the ownership of that volume to be owned by the pod: + + 1. The owning GID will be the FSGroup 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + format: int64 + type: integer + runAsGroup: + description: The GID to run the entrypoint of the container process. + Uses runtime default if unset. May also be set in SecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence for that container. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail to start + the container if it does. If unset or false, no such validation + will be performed. May also be set in SecurityContext. If set + in both SecurityContext and PodSecurityContext, the value specified + in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. May + also be set in SecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to the + container + properties: + level: + description: Level is SELinux level label that applies to the + container. + type: string + role: + description: Role is a SELinux role label that applies to the + container. + type: string + type: + description: Type is a SELinux type label that applies to the + container. + type: string + user: + description: User is a SELinux user label that applies to the + container. + type: string + supplementalGroups: + description: A list of groups applied to the first process run in + each container, in addition to the container's primary GID. If + unspecified, no groups will be added to any container. + items: + format: int64 + type: integer + type: array + sysctls: + description: Sysctls hold a list of namespaced sysctls used for + the pod. Pods with unsupported sysctls (by the container runtime) + might fail to launch. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: array + serviceAccountName: + description: ServiceAccountName is the name of the ServiceAccount to + use to run the Prometheus Pods. + type: string + serviceMonitorNamespaceSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + serviceMonitorSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + sha: + description: SHA of Prometheus container image to be deployed. Defaults + to the value of `version`. Similar to a tag, but the SHA explicitly + deploys an immutable container image. Version and Tag are ignored + if SHA is set. + type: string + storage: + description: StorageSpec defines the configured storage for a group + Prometheus servers. If neither `emptyDir` nor `volumeClaimTemplate` + is specified, then by default an [EmptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) + will be used. + properties: + emptyDir: + description: Represents an empty directory for a pod. Empty directory + volumes support ownership management and SELinux relabeling. + properties: + medium: + description: 'What type of storage medium should back this directory. + The default is "" which means to use the node''s default medium. + Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' + type: string + sizeLimit: {} + volumeClaimTemplate: + description: PersistentVolumeClaim is a user's request for and claim + to a persistent volume + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this + representation of an object. Servers should convert recognized + schemas to the latest internal value, and may reject unrecognized + values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource + this object represents. Servers may infer this from the endpoint + the client submits requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + metadata: + description: ObjectMeta is metadata that all persisted resources + must have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map + stored with a resource that may be set by external tools + to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs + to. This is used to distinguish resources with same name + and namespace in different clusters. This field is not + set anywhere right now and apiserver is going to ignore + it if set in create or update request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to + gracefully terminate before it will be removed from the + system. Only set when deletionTimestamp is also set. May + only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted + from the registry. Each entry is an identifier for the + responsible component that will remove the entry from + the list. If the deletionTimestamp of the object is non-nil, + entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that + must execute in order before this object is visible. + When the last pending initializer is removed, and + no failing result is set, the initializers struct + will be set to nil and the object is considered as + initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible + for initializing this object. + type: string + required: + - name + type: array + result: + description: Status is a return value for calls that + don't return other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema + of this representation of an object. Servers should + convert recognized schemas to the latest internal + value, and may reject unrecognized values. More + info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this + status, 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional + properties that MAY be set by the server to provide + additional information about a response. The Reason + field of a Status object defines what attributes + will be set. Clients must ignore fields that do + not match the defined type of each attribute, + and should assume that any attribute may be empty, + invalid, or under defined. + properties: + causes: + description: The Causes array includes more + details associated with the StatusReason failure. + Not all StatusReasons may provide detailed + causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases + when multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description + of the cause of the error. This field + may be presented as-is to a reader. + type: string + reason: + description: A machine-readable description + of the cause of the error. If this value + is empty there is no information available. + type: string + type: array + group: + description: The group attribute of the resource + associated with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource + associated with the status StatusReason. On + some operations may differ from the requested + resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource + associated with the status StatusReason (when + there is a single name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds + before the operation should be retried. Some + errors may indicate the client must take an + alternate action - for those errors this field + may indicate how long to wait before taking + the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there + is a single resource which can be described). + More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + kind: + description: 'Kind is a string value representing + the REST resource this object represents. Servers + may infer this from the endpoint the client submits + requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the + status of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various + status objects. A resource may have only one of + {ObjectMeta, ListMeta}. + properties: + continue: + description: continue may be set if the user + set a limit on the number of items returned, + and indicates that the server has more data + available. The value is opaque and may be + used to issue another request to the endpoint + that served this list to retrieve the next + set of available objects. Continuing a consistent + list may not be possible if the server configuration + has changed or more than a few minutes have + passed. The resourceVersion field returned + when using this continue value will be identical + to the value in the first response, unless + you have received this token from an error + message. + type: string + resourceVersion: + description: 'String that identifies the server''s + internal version of this object that can be + used by clients to determine when objects + have changed. Value must be treated as opaque + by clients and passed unmodified back to the + server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing + this object. Populated by the system. Read-only. + type: string + reason: + description: A machine-readable description of why + this operation is in the "Failure" status. If + this value is empty there is no information available. + A Reason clarifies an HTTP status code but does + not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + required: + - pending + labels: + description: 'Map of string keys and values that can be + used to organize and categorize (scope and select) objects. + May match selectors of replication controllers and services. + More info: http://kubernetes.io/docs/user-guide/labels' + type: object + name: + description: 'Name must be unique within a namespace. Is + required when creating resources, although some resources + may allow a client to request the generation of an appropriate + name automatically. Name is primarily intended for creation + idempotence and configuration definition. Cannot be updated. + More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If + ALL objects in the list have been deleted, this object + will be garbage collected. If this object is managed by + a controller, then an entry in this list will point to + this controller, with the controller field set to true. + There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information + to let you identify an owning object. An owning object + must be in the same namespace as the dependent, or be + cluster-scoped, so there is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from + the key-value store until this reference is removed. + Defaults to false. To set this field, a user needs + "delete" permission of the owner, otherwise 422 + (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the + managing controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + spec: + description: PersistentVolumeClaimSpec describes the common + attributes of storage devices and allows a Source for provider-specific + attributes + properties: + accessModes: + description: 'AccessModes contains the desired access modes + the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + dataSource: + description: TypedLocalObjectReference contains enough information + to let you locate the typed referenced object inside the + same namespace. + properties: + apiGroup: + description: APIGroup is the group for the resource + being referenced. If APIGroup is not specified, the + specified Kind must be in the core API group. For + any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + resources: + description: ResourceRequirements describes the compute + resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of + compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount + of compute resources required. If Requests is omitted + for a container, it defaults to Limits if that is + explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + selector: + description: A label selector is a label query over a set + of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + storageClassName: + description: 'Name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + type: string + volumeMode: + description: volumeMode defines what type of volume is required + by the claim. Value of Filesystem is implied when not + included in claim spec. This is a beta feature. + type: string + volumeName: + description: VolumeName is the binding reference to the + PersistentVolume backing this claim. + type: string + status: + description: PersistentVolumeClaimStatus is the current status + of a persistent volume claim. + properties: + accessModes: + description: 'AccessModes contains the actual access modes + the volume backing the PVC has. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + capacity: + description: Represents the actual resources of the underlying + volume. + type: object + conditions: + description: Current Condition of persistent volume claim. + If underlying persistent volume is being resized then + the Condition will be set to 'ResizeStarted'. + items: + description: PersistentVolumeClaimCondition contails details + about state of pvc + properties: + lastProbeTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + lastTransitionTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + message: + description: Human-readable message indicating details + about last transition. + type: string + reason: + description: Unique, this should be a short, machine + understandable string that gives the reason for + condition's last transition. If it reports "ResizeStarted" + that means the underlying persistent volume is being + resized. + type: string + status: + type: string + type: + type: string + required: + - type + - status + type: array + phase: + description: Phase represents the current phase of PersistentVolumeClaim. + type: string + tag: + description: Tag of Prometheus container image to be deployed. Defaults + to the value of `version`. Version is ignored if Tag is set. + type: string + thanos: + description: ThanosSpec defines parameters for a Prometheus server within + a Thanos deployment. + properties: + baseImage: + description: Thanos base image if other than default. + type: string + clusterAdvertiseAddress: + description: Explicit (external) ip:port address to advertise for + gossip in gossip cluster. Used internally for membership only. + type: string + gcs: + description: 'Deprecated: ThanosGCSSpec should be configured with + an ObjectStorageConfig secret starting with Thanos v0.2.0. ThanosGCSSpec + will be removed.' + properties: + bucket: + description: Google Cloud Storage bucket name for stored blocks. + If empty it won't store any block inside Google Cloud Storage. + type: string + credentials: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + grpcAdvertiseAddress: + description: Explicit (external) host:port address to advertise + for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' + will be used. + type: string + image: + description: Image if specified has precedence over baseImage, tag + and sha combinations. Specifying the version is still necessary + to ensure the Prometheus Operator knows what version of Thanos + is being configured. + type: string + objectStorageConfig: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be + a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be + defined + type: boolean + required: + - key + peers: + description: Peers is a DNS name for Thanos to discover peers through. + type: string + resources: + description: ResourceRequirements describes the compute resource + requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + s3: + description: 'Deprecated: ThanosS3Spec should be configured with + an ObjectStorageConfig secret starting with Thanos v0.2.0. ThanosS3Spec + will be removed.' + properties: + accessKey: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + bucket: + description: S3-Compatible API bucket name for stored blocks. + type: string + encryptsse: + description: Whether to use Server Side Encryption + type: boolean + endpoint: + description: S3-Compatible API endpoint for stored blocks. + type: string + insecure: + description: Whether to use an insecure connection with an S3-Compatible + API. + type: boolean + secretKey: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + signatureVersion2: + description: Whether to use S3 Signature Version 2; otherwise + Signature Version 4 will be used. + type: boolean + sha: + description: SHA of Thanos container image to be deployed. Defaults + to the value of `version`. Similar to a tag, but the SHA explicitly + deploys an immutable container image. Version and Tag are ignored + if SHA is set. + type: string + tag: + description: Tag of Thanos sidecar container image to be deployed. + Defaults to the value of `version`. Version is ignored if Tag + is set. + type: string + version: + description: Version describes the version of Thanos to use. + type: string + tolerations: + description: If specified, the pod's tolerations. + items: + description: The pod this Toleration is attached to tolerates any + taint that matches the triple <key,value,effect> using the matching + operator <operator>. + properties: + effect: + description: Effect indicates the taint effect to match. Empty + means match all taint effects. When specified, allowed values + are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the toleration applies + to. Empty means match all taint keys. If the key is empty, operator + must be Exists; this combination means to match all values and + all keys. + type: string + operator: + description: Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. Exists + is equivalent to wildcard for value, so that a pod can tolerate + all taints of a particular category. + type: string + tolerationSeconds: + description: TolerationSeconds represents the period of time the + toleration (which must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By default, it is not + set, which means tolerate the taint forever (do not evict). + Zero and negative values will be treated as 0 (evict immediately) + by the system. + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise + just a regular string. + type: string + type: array + version: + description: Version of Prometheus to be deployed. + type: string + status: + description: 'PrometheusStatus is the most recent observed status of the + Prometheus cluster. Read-only. Not included when requesting from the apiserver, + only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + availableReplicas: + description: Total number of available pods (ready for at least minReadySeconds) + targeted by this Prometheus deployment. + format: int32 + type: integer + paused: + description: Represents whether any actions on the underlaying managed + objects are being performed. Only delete actions will be performed. + type: boolean + replicas: + description: Total number of non-terminated pods targeted by this Prometheus + deployment (their labels match the selector). + format: int32 + type: integer + unavailableReplicas: + description: Total number of unavailable pods targeted by this Prometheus + deployment. + format: int32 + type: integer + updatedReplicas: + description: Total number of non-terminated pods targeted by this Prometheus + deployment that have the desired version spec. + format: int32 + type: integer + required: + - paused + - replicas + - updatedReplicas + - availableReplicas + - unavailableReplicas + version: v1 diff --git a/manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml b/manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76a78998eed52ddee08b248274ff098e6231ece2 --- /dev/null +++ b/manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml @@ -0,0 +1,343 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: prometheusrules.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: PrometheusRule + plural: prometheusrules + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + metadata: + description: ObjectMeta is metadata that all persisted resources must have, + which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map stored with + a resource that may be set by external tools to store and retrieve + arbitrary metadata. They are not queryable and should be preserved + when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs to. This + is used to distinguish resources with same name and namespace in different + clusters. This field is not set anywhere right now and apiserver is + going to ignore it if set in create or update request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of the + factory methods that the time package offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to gracefully + terminate before it will be removed from the system. Only set when + deletionTimestamp is also set. May only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of the + factory methods that the time package offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted from the registry. + Each entry is an identifier for the responsible component that will + remove the entry from the list. If the deletionTimestamp of the object + is non-nil, entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation of + the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that must execute + in order before this object is visible. When the last pending + initializer is removed, and no failing result is set, the initializers + struct will be set to nil and the object is considered as initialized + and visible to all clients. + items: + description: Initializer is information about an initializer that + has not yet completed. + properties: + name: + description: name of the process that is responsible for initializing + this object. + type: string + required: + - name + type: array + result: + description: Status is a return value for calls that don't return + other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this + representation of an object. Servers should convert recognized + schemas to the latest internal value, and may reject unrecognized + values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this status, 0 if + not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional properties + that MAY be set by the server to provide additional information + about a response. The Reason field of a Status object defines + what attributes will be set. Clients must ignore fields that + do not match the defined type of each attribute, and should + assume that any attribute may be empty, invalid, or under + defined. + properties: + causes: + description: The Causes array includes more details associated + with the StatusReason failure. Not all StatusReasons may + provide detailed causes. + items: + description: StatusCause provides more information about + an api.Status failure, including cases when multiple + errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description of the cause + of the error. This field may be presented as-is + to a reader. + type: string + reason: + description: A machine-readable description of the + cause of the error. If this value is empty there + is no information available. + type: string + type: array + group: + description: The group attribute of the resource associated + with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource associated + with the status StatusReason. On some operations may differ + from the requested resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource associated + with the status StatusReason (when there is a single name + which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds before the + operation should be retried. Some errors may indicate + the client must take an alternate action - for those errors + this field may indicate how long to wait before taking + the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there is a single + resource which can be described). More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + kind: + description: 'Kind is a string value representing the REST resource + this object represents. Servers may infer this from the endpoint + the client submits requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the status of this + operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic resources + must have, including lists and various status objects. A resource + may have only one of {ObjectMeta, ListMeta}. + properties: + continue: + description: continue may be set if the user set a limit + on the number of items returned, and indicates that the + server has more data available. The value is opaque and + may be used to issue another request to the endpoint that + served this list to retrieve the next set of available + objects. Continuing a consistent list may not be possible + if the server configuration has changed or more than a + few minutes have passed. The resourceVersion field returned + when using this continue value will be identical to the + value in the first response, unless you have received + this token from an error message. + type: string + resourceVersion: + description: 'String that identifies the server''s internal + version of this object that can be used by clients to + determine when objects have changed. Value must be treated + as opaque by clients and passed unmodified back to the + server. Populated by the system. Read-only. More info: + https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + reason: + description: A machine-readable description of why this operation + is in the "Failure" status. If this value is empty there is + no information available. A Reason clarifies an HTTP status + code but does not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" or + "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + required: + - pending + labels: + description: 'Map of string keys and values that can be used to organize + and categorize (scope and select) objects. May match selectors of + replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels' + type: object + name: + description: 'Name must be unique within a namespace. Is required when + creating resources, although some resources may allow a client to + request the generation of an appropriate name automatically. Name + is primarily intended for creation idempotence and configuration definition. + Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If ALL objects + in the list have been deleted, this object will be garbage collected. + If this object is managed by a controller, then an entry in this list + will point to this controller, with the controller field set to true. + There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information to let you + identify an owning object. An owning object must be in the same + namespace as the dependent, or be cluster-scoped, so there is no + namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from the key-value + store until this reference is removed. Defaults to false. To + set this field, a user needs "delete" permission of the owner, + otherwise 422 (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the managing controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. Populated by + the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + spec: + description: PrometheusRuleSpec contains specification parameters for a + Rule. + properties: + groups: + description: Content of Prometheus rule file + items: + description: RuleGroup is a list of sequentially evaluated recording + and alerting rules. + properties: + interval: + type: string + name: + type: string + rules: + items: + description: Rule describes an alerting or recording rule. + properties: + alert: + type: string + annotations: + type: object + expr: + anyOf: + - type: string + - type: integer + for: + type: string + labels: + type: object + record: + type: string + required: + - expr + type: array + required: + - name + - rules + type: array + version: v1 diff --git a/manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml b/manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..431bde39a0c9404225dafdbc844eef13b27ce4e2 --- /dev/null +++ b/manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml @@ -0,0 +1,291 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: servicemonitors.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: ServiceMonitor + plural: servicemonitors + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + spec: + description: ServiceMonitorSpec contains specification parameters for a + ServiceMonitor. + properties: + endpoints: + description: A list of endpoints allowed as part of this ServiceMonitor. + items: + description: Endpoint defines a scrapeable endpoint serving Prometheus + metrics. + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over + basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + bearerTokenFile: + description: File to read bearer token for scraping targets. + type: string + honorLabels: + description: HonorLabels chooses the metric's labels on collisions + with target labels. + type: boolean + interval: + description: Interval at which metrics should be scraped + type: string + metricRelabelings: + description: MetricRelabelConfigs to apply to samples before ingestion. + items: + description: 'RelabelConfig allows dynamic rewriting of the + label set, being applied to samples before ingestion. It defines + `<metric_relabel_configs>`-section of Prometheus configuration. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + description: Action to perform based on regex matching. + Default is 'replace' + type: string + modulus: + description: Modulus to take of the hash of the source label + values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted + value is matched. defailt is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace + is performed if the regular expression matches. Regex + capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular expression + for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + type: array + params: + description: Optional HTTP URL parameters + type: object + path: + description: HTTP path to scrape for metrics. + type: string + port: + description: Name of the service port this endpoint refers to. + Mutually exclusive with targetPort. + type: string + proxyUrl: + description: ProxyURL eg http://proxyserver:2195 Directs scrapes + to proxy through this endpoint. + type: string + relabelings: + description: 'RelabelConfigs to apply to samples before ingestion. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config' + items: + description: 'RelabelConfig allows dynamic rewriting of the + label set, being applied to samples before ingestion. It defines + `<metric_relabel_configs>`-section of Prometheus configuration. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + description: Action to perform based on regex matching. + Default is 'replace' + type: string + modulus: + description: Modulus to take of the hash of the source label + values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted + value is matched. defailt is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace + is performed if the regular expression matches. Regex + capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular expression + for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + type: array + scheme: + description: HTTP scheme to use for scraping. + type: string + scrapeTimeout: + description: Timeout after which the scrape is ended + type: string + targetPort: + anyOf: + - type: string + - type: integer + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + type: array + jobLabel: + description: The label to use to retrieve the job name from. + type: string + namespaceSelector: + description: NamespaceSelector is a selector for selecting either all + namespaces or a list of namespaces. + properties: + any: + description: Boolean describing whether all namespaces are selected + in contrast to a list restricting them. + type: boolean + matchNames: + description: List of namespace names. + items: + type: string + type: array + podTargetLabels: + description: PodTargetLabels transfers labels on the Kubernetes Pod + onto the target. + items: + type: string + type: array + sampleLimit: + description: SampleLimit defines per-scrape limit on number of scraped + samples that will be accepted. + format: int64 + type: integer + selector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + targetLabels: + description: TargetLabels transfers labels on the Kubernetes Service + onto the target. + items: + type: string + type: array + required: + - endpoints + - selector + version: v1 diff --git a/manifests/0prometheus-operator-clusterRole.yaml b/manifests/0prometheus-operator-clusterRole.yaml new file mode 100644 index 0000000000000000000000000000000000000000..123f78e93b5dfefe9b261c2a7b8af65d0e942ab2 --- /dev/null +++ b/manifests/0prometheus-operator-clusterRole.yaml @@ -0,0 +1,68 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-operator +rules: +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - '*' +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - prometheuses + - prometheuses/finalizers + - alertmanagers/finalizers + - servicemonitors + - prometheusrules + verbs: + - '*' +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - '*' +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - '*' +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - delete +- apiGroups: + - "" + resources: + - services + - services/finalizers + - endpoints + verbs: + - get + - create + - update + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch diff --git a/manifests/0prometheus-operator-clusterRoleBinding.yaml b/manifests/0prometheus-operator-clusterRoleBinding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f1b4d36ca2d94b20ab75794e37849845c0b71d4 --- /dev/null +++ b/manifests/0prometheus-operator-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-operator +subjects: +- kind: ServiceAccount + name: prometheus-operator + namespace: monitoring diff --git a/manifests/0prometheus-operator-deployment.yaml b/manifests/0prometheus-operator-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1ace87ec59d3b4045ed33bcbb64dd993f7b1d8f --- /dev/null +++ b/manifests/0prometheus-operator-deployment.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + labels: + k8s-app: prometheus-operator + name: prometheus-operator + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + k8s-app: prometheus-operator + template: + metadata: + labels: + k8s-app: prometheus-operator + spec: + containers: + - args: + - --kubelet-service=kube-system/kubelet + - --logtostderr=true + - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1 + - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.29.0 + image: quay.io/coreos/prometheus-operator:v0.29.0 + name: prometheus-operator + ports: + - containerPort: 8080 + name: http + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: prometheus-operator diff --git a/manifests/0prometheus-operator-service.yaml b/manifests/0prometheus-operator-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5231b337691868ab7cae993ea2584707645be909 --- /dev/null +++ b/manifests/0prometheus-operator-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: prometheus-operator + name: prometheus-operator + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http + port: 8080 + targetPort: http + selector: + k8s-app: prometheus-operator diff --git a/manifests/0prometheus-operator-serviceAccount.yaml b/manifests/0prometheus-operator-serviceAccount.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11898a6fbef1c3e2e47f4f554967e5d5651c808c --- /dev/null +++ b/manifests/0prometheus-operator-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-operator + namespace: monitoring diff --git a/manifests/0prometheus-operator-serviceMonitor.yaml b/manifests/0prometheus-operator-serviceMonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14f402fb3a86043fa894d99801bb70a1f74946a5 --- /dev/null +++ b/manifests/0prometheus-operator-serviceMonitor.yaml @@ -0,0 +1,14 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: prometheus-operator + name: prometheus-operator + namespace: monitoring +spec: + endpoints: + - honorLabels: true + port: http + selector: + matchLabels: + k8s-app: prometheus-operator diff --git a/manifests/alertmanager-alertmanager.yaml b/manifests/alertmanager-alertmanager.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93c52a859388fec43c976036339e152050e4581f --- /dev/null +++ b/manifests/alertmanager-alertmanager.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: Alertmanager +metadata: + labels: + alertmanager: main + name: main + namespace: monitoring +spec: + baseImage: quay.io/prometheus/alertmanager + nodeSelector: + beta.kubernetes.io/os: linux + replicas: 3 + securityContext: + fsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + serviceAccountName: alertmanager-main + version: v0.16.1 diff --git a/manifests/alertmanager-secret.yaml b/manifests/alertmanager-secret.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ee9c606e1a40273144c81d3ccc3fada567a0499 --- /dev/null +++ b/manifests/alertmanager-secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +data: + alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg== +kind: Secret +metadata: + name: alertmanager-main + namespace: monitoring +type: Opaque diff --git a/manifests/alertmanager-service.yaml b/manifests/alertmanager-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df4c9ff5d490296d14511caa63fe89cfa3f3c927 --- /dev/null +++ b/manifests/alertmanager-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + alertmanager: main + name: alertmanager-main + namespace: monitoring +spec: + ports: + - name: web + port: 9093 + targetPort: web + selector: + alertmanager: main + app: alertmanager + sessionAffinity: ClientIP diff --git a/manifests/alertmanager-serviceAccount.yaml b/manifests/alertmanager-serviceAccount.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c06d5e40cb4b3d99ad5388af1bb401458b63213 --- /dev/null +++ b/manifests/alertmanager-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: alertmanager-main + namespace: monitoring diff --git a/manifests/alertmanager-serviceMonitor.yaml b/manifests/alertmanager-serviceMonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..548af0d6dde9646f2add57c915d9fe25524971aa --- /dev/null +++ b/manifests/alertmanager-serviceMonitor.yaml @@ -0,0 +1,14 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: alertmanager + name: alertmanager + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: web + selector: + matchLabels: + alertmanager: main diff --git a/manifests/grafana-dashboardDatasources.yaml b/manifests/grafana-dashboardDatasources.yaml new file mode 100644 index 0000000000000000000000000000000000000000..446c686459794085b492f306ecf9b7de4cb924d5 --- /dev/null +++ b/manifests/grafana-dashboardDatasources.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +data: + prometheus.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0= +kind: Secret +metadata: + name: grafana-datasources + namespace: monitoring +type: Opaque diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d07d6a06a5c467c43bc578b2bf93f7992793f9dd --- /dev/null +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -0,0 +1,10381 @@ +apiVersion: v1 +items: +- apiVersion: v1 + data: + k8s-cluster-rsrc-use.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:cluster_cpu_utilisation:ratio{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\"} / scalar(sum(min(kube_pod_info{cluster=\"$cluster\"}) by (node)))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:cluster_memory_utilisation:ratio{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Swap I/O)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Utilisation (Transmitted)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Saturation (Dropped)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Capacity", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_node_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / USE Method / Cluster", + "uid": "a6e7d1362e1ddbb79db21d5bb40d7137", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-cluster-rsrc-use + namespace: monitoring +- apiVersion: v1 + data: + k8s-node-rsrc-use.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_cpu_utilisation:avg1m{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_memory_utilisation:{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Memory", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Swap IO", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Swap I/O)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Utilisation (Transmitted)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Saturation (Dropped)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Net", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\", node=\"$node\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_node_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "node", + "multi": false, + "name": "node", + "options": [ + + ], + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / USE Method / Node", + "uid": "4ac4f123aae0ff6dbaf4f4f66120033b", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-node-rsrc-use + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-cluster.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests by Namespace", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Requests", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_cpu_seconds_total, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-cluster + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod_name}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"}) by (pod_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod_name}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-namespace + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", cluster=\"$cluster\"}) by (container_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_name}} (RSS)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_name}} (Cache)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_name}} (Swap)", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-pod + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workload.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "workload", + "multi": false, + "name": "workload", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "type", + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workload + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workloads-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workloads-namespace + namespace: monitoring +- apiVersion: v1 + data: + nodes.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(node_load1{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load 1m", + "refId": "A" + }, + { + "expr": "max(node_load5{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load 5m", + "refId": "B" + }, + { + "expr": "max(node_load15{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load 15m", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "System load", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cpu}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Usage Per Core", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": "true", + "avg": "true", + "current": "true", + "max": "false", + "min": "false", + "rightSide": "true", + "show": "true", + "total": "false", + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max (sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{ cpu }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "CPU Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory used", + "refId": "A" + }, + { + "expr": "max(node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory buffers", + "refId": "B" + }, + { + "expr": "max(node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory cached", + "refId": "C" + }, + { + "expr": "max(node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory free", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(\n (\n (\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Memory Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "read", + "yaxis": 1 + }, + { + "alias": "io time", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(rate(node_disk_read_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "read", + "refId": "A" + }, + { + "expr": "max(rate(node_disk_written_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "written", + "refId": "B" + }, + { + "expr": "max(rate(node_disk_io_time_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "io time", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(rate(node_network_receive_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(rate(node_network_transmit_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inodes used", + "refId": "A" + }, + { + "expr": "max(node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inodes free", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Inodes Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 13, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(\n (\n (\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Inodes Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_boot_time_seconds{cluster=\"$cluster\", job=\"node-exporter\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Nodes", + "uid": "fa49a4706d07a042595b664c87fb33ea", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-nodes + namespace: monitoring +- apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} - kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"}) / kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ Usage }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} / kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ Usage }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume inodes Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-persistentvolumesusage + namespace: monitoring +- apiVersion: v1 + data: + pods.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current: {{ container_name }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current: {{ container_name }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Container", + "multi": false, + "name": "container", + "options": [ + + ], + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Pods", + "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-pods + namespace: monitoring +- apiVersion: v1 + data: + statefulset.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "cores", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "CPU", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "GB", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}) / 1024^3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Memory", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "Bps", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod_name=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Network", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "100px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Desired Replicas", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 6, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Replicas of current version", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Observed Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 8, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Metadata Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas specified", + "refId": "A" + }, + { + "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas created", + "refId": "B" + }, + { + "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "ready", + "refId": "C" + }, + { + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas of current version", + "refId": "D" + }, + { + "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "updated", + "refId": "E" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Replicas", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Name", + "multi": false, + "name": "statefulset", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", namespace=\"$namespace\"}, statefulset)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / StatefulSets", + "uid": "a31c1f46e6f727cb37c0d731a7245005", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-statefulset + namespace: monitoring +kind: ConfigMapList diff --git a/manifests/grafana-dashboardSources.yaml b/manifests/grafana-dashboardSources.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8b401a73eba03ebacc2bf2d0a6474800fe3fe37 --- /dev/null +++ b/manifests/grafana-dashboardSources.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + dashboards.yaml: |- + { + "apiVersion": 1, + "providers": [ + { + "folder": "", + "name": "0", + "options": { + "path": "/grafana-dashboard-definitions/0" + }, + "orgId": 1, + "type": "file" + } + ] + } +kind: ConfigMap +metadata: + name: grafana-dashboards + namespace: monitoring diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18133259c7f70b7996bd57dcce942b78036cff7e --- /dev/null +++ b/manifests/grafana-deployment.yaml @@ -0,0 +1,125 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + labels: + app: grafana + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + containers: + - image: grafana/grafana:6.0.1 + name: grafana + ports: + - containerPort: 3000 + name: http + readinessProbe: + httpGet: + path: /api/health + port: http + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - mountPath: /var/lib/grafana + name: grafana-storage + readOnly: false + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + readOnly: false + - mountPath: /etc/grafana/provisioning/dashboards + name: grafana-dashboards + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use + name: grafana-dashboard-k8s-cluster-rsrc-use + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-node-rsrc-use + name: grafana-dashboard-k8s-node-rsrc-use + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster + name: grafana-dashboard-k8s-resources-cluster + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace + name: grafana-dashboard-k8s-resources-namespace + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod + name: grafana-dashboard-k8s-resources-pod + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload + name: grafana-dashboard-k8s-resources-workload + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace + name: grafana-dashboard-k8s-resources-workloads-namespace + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/nodes + name: grafana-dashboard-nodes + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage + name: grafana-dashboard-persistentvolumesusage + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/pods + name: grafana-dashboard-pods + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/statefulset + name: grafana-dashboard-statefulset + readOnly: false + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: grafana + volumes: + - emptyDir: {} + name: grafana-storage + - name: grafana-datasources + secret: + secretName: grafana-datasources + - configMap: + name: grafana-dashboards + name: grafana-dashboards + - configMap: + name: grafana-dashboard-k8s-cluster-rsrc-use + name: grafana-dashboard-k8s-cluster-rsrc-use + - configMap: + name: grafana-dashboard-k8s-node-rsrc-use + name: grafana-dashboard-k8s-node-rsrc-use + - configMap: + name: grafana-dashboard-k8s-resources-cluster + name: grafana-dashboard-k8s-resources-cluster + - configMap: + name: grafana-dashboard-k8s-resources-namespace + name: grafana-dashboard-k8s-resources-namespace + - configMap: + name: grafana-dashboard-k8s-resources-pod + name: grafana-dashboard-k8s-resources-pod + - configMap: + name: grafana-dashboard-k8s-resources-workload + name: grafana-dashboard-k8s-resources-workload + - configMap: + name: grafana-dashboard-k8s-resources-workloads-namespace + name: grafana-dashboard-k8s-resources-workloads-namespace + - configMap: + name: grafana-dashboard-nodes + name: grafana-dashboard-nodes + - configMap: + name: grafana-dashboard-persistentvolumesusage + name: grafana-dashboard-persistentvolumesusage + - configMap: + name: grafana-dashboard-pods + name: grafana-dashboard-pods + - configMap: + name: grafana-dashboard-statefulset + name: grafana-dashboard-statefulset diff --git a/manifests/grafana-service.yaml b/manifests/grafana-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3acdf1e8885db92ad511a0075bc26daba80fef6e --- /dev/null +++ b/manifests/grafana-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: grafana + name: grafana + namespace: monitoring +spec: + ports: + - name: http + port: 3000 + targetPort: http + selector: + app: grafana diff --git a/manifests/grafana-serviceAccount.yaml b/manifests/grafana-serviceAccount.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ed3e031e7445fc347e27f8f7134d70c384f0be3 --- /dev/null +++ b/manifests/grafana-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana + namespace: monitoring diff --git a/manifests/grafana-serviceMonitor.yaml b/manifests/grafana-serviceMonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ede266a5cfcf38e76818aa5571e009870a01e96 --- /dev/null +++ b/manifests/grafana-serviceMonitor.yaml @@ -0,0 +1,12 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: grafana + namespace: monitoring +spec: + endpoints: + - interval: 15s + port: http + selector: + matchLabels: + app: grafana diff --git a/manifests/kube-state-metrics-clusterRole.yaml b/manifests/kube-state-metrics-clusterRole.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b939df6843ac424b2adfb3863d9f1227a41c1003 --- /dev/null +++ b/manifests/kube-state-metrics-clusterRole.yaml @@ -0,0 +1,76 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch diff --git a/manifests/kube-state-metrics-clusterRoleBinding.yaml b/manifests/kube-state-metrics-clusterRoleBinding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a8f3111abb8f0f418960ff0cdd56aaf95037076 --- /dev/null +++ b/manifests/kube-state-metrics-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: monitoring diff --git a/manifests/kube-state-metrics-deployment.yaml b/manifests/kube-state-metrics-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d57ea12d0ea1db86f9d77d47a49735f935a9d33b --- /dev/null +++ b/manifests/kube-state-metrics-deployment.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + labels: + app: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: kube-state-metrics + template: + metadata: + labels: + app: kube-state-metrics + spec: + containers: + - args: + - --logtostderr + - --secure-listen-address=:8443 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8081/ + image: quay.io/coreos/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy-main + ports: + - containerPort: 8443 + name: https-main + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + - args: + - --logtostderr + - --secure-listen-address=:9443 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8082/ + image: quay.io/coreos/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy-self + ports: + - containerPort: 9443 + name: https-self + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + - args: + - --host=127.0.0.1 + - --port=8081 + - --telemetry-host=127.0.0.1 + - --telemetry-port=8082 + image: quay.io/coreos/kube-state-metrics:v1.5.0 + name: kube-state-metrics + resources: + limits: + cpu: 100m + memory: 150Mi + requests: + cpu: 100m + memory: 150Mi + - command: + - /pod_nanny + - --container=kube-state-metrics + - --cpu=100m + - --extra-cpu=2m + - --memory=150Mi + - --extra-memory=30Mi + - --threshold=5 + - --deployment=kube-state-metrics + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + image: k8s.gcr.io/addon-resizer:1.8.4 + name: addon-resizer + resources: + limits: + cpu: 50m + memory: 30Mi + requests: + cpu: 10m + memory: 30Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: kube-state-metrics diff --git a/manifests/kube-state-metrics-role.yaml b/manifests/kube-state-metrics-role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e03d889881fd2b0792e1a394d479e2c60c5a4cc2 --- /dev/null +++ b/manifests/kube-state-metrics-role.yaml @@ -0,0 +1,30 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kube-state-metrics + namespace: monitoring +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - extensions + resourceNames: + - kube-state-metrics + resources: + - deployments + verbs: + - get + - update +- apiGroups: + - apps + resourceNames: + - kube-state-metrics + resources: + - deployments + verbs: + - get + - update diff --git a/manifests/kube-state-metrics-roleBinding.yaml b/manifests/kube-state-metrics-roleBinding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c61143c24186d69d48af9a74171720de6e502cc --- /dev/null +++ b/manifests/kube-state-metrics-roleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kube-state-metrics + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics diff --git a/manifests/kube-state-metrics-service.yaml b/manifests/kube-state-metrics-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84927af32351b71e0deddfaf19ba0c575c449cb0 --- /dev/null +++ b/manifests/kube-state-metrics-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https-main + port: 8443 + targetPort: https-main + - name: https-self + port: 9443 + targetPort: https-self + selector: + app: kube-state-metrics diff --git a/manifests/kube-state-metrics-serviceAccount.yaml b/manifests/kube-state-metrics-serviceAccount.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fff1028b442c69109cb8fa8e5f808f2a856838f8 --- /dev/null +++ b/manifests/kube-state-metrics-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: monitoring diff --git a/manifests/kube-state-metrics-serviceMonitor.yaml b/manifests/kube-state-metrics-serviceMonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2100449da24436a7361342a2186a6d0cb96736d2 --- /dev/null +++ b/manifests/kube-state-metrics-serviceMonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + port: https-main + scheme: https + scrapeTimeout: 30s + tlsConfig: + insecureSkipVerify: true + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https-self + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: kube-state-metrics diff --git a/manifests/node-exporter-clusterRole.yaml b/manifests/node-exporter-clusterRole.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad783ae9bfdb3eb2c57e8d7bc9be580e75880bbf --- /dev/null +++ b/manifests/node-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: node-exporter +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/manifests/node-exporter-clusterRoleBinding.yaml b/manifests/node-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5a2050810d0976538a2aa08d4565d4a84bc4a07 --- /dev/null +++ b/manifests/node-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: node-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-exporter +subjects: +- kind: ServiceAccount + name: node-exporter + namespace: monitoring diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56e4b90b879b78416cfd8d5fc9475435c663ac18 --- /dev/null +++ b/manifests/node-exporter-daemonset.yaml @@ -0,0 +1,91 @@ +apiVersion: apps/v1beta2 +kind: DaemonSet +metadata: + labels: + app: node-exporter + name: node-exporter + namespace: monitoring +spec: + selector: + matchLabels: + app: node-exporter + template: + metadata: + labels: + app: node-exporter + spec: + containers: + - args: + - --web.listen-address=127.0.0.1:9100 + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/) + - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ + - --collector.ntp + image: quay.io/prometheus/node-exporter:v0.17.0 + name: node-exporter + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + - args: + - --logtostderr + - --secure-listen-address=$(IP):9100 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9100/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay.io/coreos/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9100 + hostPort: 9100 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + hostNetwork: true + hostPID: true + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: node-exporter + tolerations: + - effect: NoExecute + operator: Exists + - effect: NoSchedule + operator: Exists + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root diff --git a/manifests/node-exporter-service.yaml b/manifests/node-exporter-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d728d76075c39ab333514e2e318ad5d9f7ab21d --- /dev/null +++ b/manifests/node-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: node-exporter + name: node-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9100 + targetPort: https + selector: + app: node-exporter diff --git a/manifests/node-exporter-serviceAccount.yaml b/manifests/node-exporter-serviceAccount.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a03ac1641f68cbca54468aae86c79a20156058a --- /dev/null +++ b/manifests/node-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter + namespace: monitoring diff --git a/manifests/node-exporter-serviceMonitor.yaml b/manifests/node-exporter-serviceMonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..273d2748e45cb8a9bf63e4e0682c1d3e02e80e7e --- /dev/null +++ b/manifests/node-exporter-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: node-exporter + name: node-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: node-exporter diff --git a/manifests/prometheus-adapter-apiService.yaml b/manifests/prometheus-adapter-apiService.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a215efe4554c583ec42ec99df08c4b581972c23b --- /dev/null +++ b/manifests/prometheus-adapter-apiService.yaml @@ -0,0 +1,13 @@ +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1beta1.metrics.k8s.io +spec: + group: metrics.k8s.io + groupPriorityMinimum: 100 + insecureSkipTLSVerify: true + service: + name: prometheus-adapter + namespace: monitoring + version: v1beta1 + versionPriority: 100 diff --git a/manifests/prometheus-adapter-clusterRole.yaml b/manifests/prometheus-adapter-clusterRole.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a02d2bb050365e23d178dca0bd580ed6f6c94584 --- /dev/null +++ b/manifests/prometheus-adapter-clusterRole.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-adapter +rules: +- apiGroups: + - "" + resources: + - nodes + - namespaces + - pods + - services + verbs: + - get + - list + - watch diff --git a/manifests/prometheus-adapter-clusterRoleBinding.yaml b/manifests/prometheus-adapter-clusterRoleBinding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e8f3da92b6b7b48ee08481d0a77b850c633c514 --- /dev/null +++ b/manifests/prometheus-adapter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-adapter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-adapter +subjects: +- kind: ServiceAccount + name: prometheus-adapter + namespace: monitoring diff --git a/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4295b50f008f4a7ce5841445ab71f150dc8c4334 --- /dev/null +++ b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: resource-metrics:system:auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: +- kind: ServiceAccount + name: prometheus-adapter + namespace: monitoring diff --git a/manifests/prometheus-adapter-clusterRoleServerResources.yaml b/manifests/prometheus-adapter-clusterRoleServerResources.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcb914c364dbdef9bf57defdccfe2f0ead709eca --- /dev/null +++ b/manifests/prometheus-adapter-clusterRoleServerResources.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: resource-metrics-server-resources +rules: +- apiGroups: + - metrics.k8s.io + resources: + - '*' + verbs: + - '*' diff --git a/manifests/prometheus-adapter-configMap.yaml b/manifests/prometheus-adapter-configMap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15434fc5ec8df846ed11c9342fad5e8b75c4b2a4 --- /dev/null +++ b/manifests/prometheus-adapter-configMap.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +data: + config.yaml: | + resourceRules: + cpu: + containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>) + nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod_name: + resource: pod + containerLabel: container_name + memory: + containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>) + nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod_name: + resource: pod + containerLabel: container_name + window: 1m +kind: ConfigMap +metadata: + name: adapter-config + namespace: monitoring diff --git a/manifests/prometheus-adapter-deployment.yaml b/manifests/prometheus-adapter-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0c2019806251deb1b2bc8e22bb957dc8b7b4cfa --- /dev/null +++ b/manifests/prometheus-adapter-deployment.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: prometheus-adapter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + name: prometheus-adapter + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + name: prometheus-adapter + spec: + containers: + - args: + - --cert-dir=/var/run/serving-cert + - --config=/etc/adapter/config.yaml + - --logtostderr=true + - --metrics-relist-interval=1m + - --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/ + - --secure-port=6443 + image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.4.1 + name: prometheus-adapter + ports: + - containerPort: 6443 + volumeMounts: + - mountPath: /tmp + name: tmpfs + readOnly: false + - mountPath: /var/run/serving-cert + name: volume-serving-cert + readOnly: false + - mountPath: /etc/adapter + name: config + readOnly: false + nodeSelector: + beta.kubernetes.io/os: linux + serviceAccountName: prometheus-adapter + volumes: + - emptyDir: {} + name: tmpfs + - emptyDir: {} + name: volume-serving-cert + - configMap: + name: adapter-config + name: config diff --git a/manifests/prometheus-adapter-roleBindingAuthReader.yaml b/manifests/prometheus-adapter-roleBindingAuthReader.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48c8f3253d484a15d96df4fc1265e03e6c273390 --- /dev/null +++ b/manifests/prometheus-adapter-roleBindingAuthReader.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: resource-metrics-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: +- kind: ServiceAccount + name: prometheus-adapter + namespace: monitoring diff --git a/manifests/prometheus-adapter-service.yaml b/manifests/prometheus-adapter-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e786e01c4aff3ce0e9ca3c3266b1dda817ca8935 --- /dev/null +++ b/manifests/prometheus-adapter-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + name: prometheus-adapter + name: prometheus-adapter + namespace: monitoring +spec: + ports: + - name: https + port: 443 + targetPort: 6443 + selector: + name: prometheus-adapter diff --git a/manifests/prometheus-adapter-serviceAccount.yaml b/manifests/prometheus-adapter-serviceAccount.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7e7050391eecfc48a05e98fcd95d5b50f88a18e --- /dev/null +++ b/manifests/prometheus-adapter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-adapter + namespace: monitoring diff --git a/manifests/prometheus-clusterRole.yaml b/manifests/prometheus-clusterRole.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5c4598304eaf7fe0c183e34969ca734f793aa75 --- /dev/null +++ b/manifests/prometheus-clusterRole.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-k8s +rules: +- apiGroups: + - "" + resources: + - nodes/metrics + verbs: + - get +- nonResourceURLs: + - /metrics + verbs: + - get diff --git a/manifests/prometheus-clusterRoleBinding.yaml b/manifests/prometheus-clusterRoleBinding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..554bb6f8767c0a8a346614c685649321a176b640 --- /dev/null +++ b/manifests/prometheus-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-k8s +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-k8s +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: monitoring diff --git a/manifests/prometheus-prometheus.yaml b/manifests/prometheus-prometheus.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5952cc7fad4c78e4656750dd888c6d4ee3e4065 --- /dev/null +++ b/manifests/prometheus-prometheus.yaml @@ -0,0 +1,32 @@ +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +metadata: + labels: + prometheus: k8s + name: k8s + namespace: monitoring +spec: + alerting: + alertmanagers: + - name: alertmanager-main + namespace: monitoring + port: web + baseImage: quay.io/prometheus/prometheus + nodeSelector: + beta.kubernetes.io/os: linux + replicas: 2 + resources: + requests: + memory: 400Mi + ruleSelector: + matchLabels: + prometheus: k8s + role: alert-rules + securityContext: + fsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + serviceAccountName: prometheus-k8s + serviceMonitorNamespaceSelector: {} + serviceMonitorSelector: {} + version: v2.7.2 diff --git a/manifests/prometheus-roleBindingConfig.yaml b/manifests/prometheus-roleBindingConfig.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec0129db5bf7f474b8aa124b63be72d50e97d667 --- /dev/null +++ b/manifests/prometheus-roleBindingConfig.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: prometheus-k8s-config + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-k8s-config +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: monitoring diff --git a/manifests/prometheus-roleBindingSpecificNamespaces.yaml b/manifests/prometheus-roleBindingSpecificNamespaces.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7527f6aa1330f894adb83136444c8160ea92e90 --- /dev/null +++ b/manifests/prometheus-roleBindingSpecificNamespaces.yaml @@ -0,0 +1,42 @@ +apiVersion: rbac.authorization.k8s.io/v1 +items: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: prometheus-k8s + namespace: default + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-k8s + subjects: + - kind: ServiceAccount + name: prometheus-k8s + namespace: monitoring +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: prometheus-k8s + namespace: kube-system + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-k8s + subjects: + - kind: ServiceAccount + name: prometheus-k8s + namespace: monitoring +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: prometheus-k8s + namespace: monitoring + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-k8s + subjects: + - kind: ServiceAccount + name: prometheus-k8s + namespace: monitoring +kind: RoleBindingList diff --git a/manifests/prometheus-roleConfig.yaml b/manifests/prometheus-roleConfig.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f1cd043919a18141c2331c05836d5ef45d099eb --- /dev/null +++ b/manifests/prometheus-roleConfig.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: prometheus-k8s-config + namespace: monitoring +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get diff --git a/manifests/prometheus-roleSpecificNamespaces.yaml b/manifests/prometheus-roleSpecificNamespaces.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b920b886e3cfee0fe48648533df5206f277fe041 --- /dev/null +++ b/manifests/prometheus-roleSpecificNamespaces.yaml @@ -0,0 +1,51 @@ +apiVersion: rbac.authorization.k8s.io/v1 +items: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + name: prometheus-k8s + namespace: default + rules: + - apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + name: prometheus-k8s + namespace: kube-system + rules: + - apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + name: prometheus-k8s + namespace: monitoring + rules: + - apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch +kind: RoleList diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee1f21ba15df6732e1e085847c7a1a1c924db8a7 --- /dev/null +++ b/manifests/prometheus-rules.yaml @@ -0,0 +1,1102 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: prometheus-k8s-rules + namespace: monitoring +spec: + groups: + - name: k8s.rules + rules: + - expr: | + sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace) + record: namespace:container_cpu_usage_seconds_total:sum_rate + - expr: | + sum by (namespace, pod_name, container_name) ( + rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m]) + ) + record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate + - expr: | + sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace) + record: namespace:container_memory_usage_bytes:sum + - expr: | + sum by (namespace, label_name) ( + sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace, pod_name) + * on (namespace, pod_name) group_left(label_name) + label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)") + ) + record: namespace_name:container_cpu_usage_seconds_total:sum_rate + - expr: | + sum by (namespace, label_name) ( + sum(container_memory_usage_bytes{job="kubelet",image!="", container_name!=""}) by (pod_name, namespace) + * on (namespace, pod_name) group_left(label_name) + label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)") + ) + record: namespace_name:container_memory_usage_bytes:sum + - expr: | + sum by (namespace, label_name) ( + sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) + * on (namespace, pod) group_left(label_name) + label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)") + ) + record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum + - expr: | + sum by (namespace, label_name) ( + sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) + * on (namespace, pod) group_left(label_name) + label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)") + ) + record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum + - expr: | + sum( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: deployment + record: mixin_pod_workload + - expr: | + sum( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: daemonset + record: mixin_pod_workload + - expr: | + sum( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: statefulset + record: mixin_pod_workload + - name: kube-scheduler.rules + rules: + - expr: | + histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: | + histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: | + histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:scheduler_binding_latency:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:scheduler_binding_latency:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:scheduler_binding_latency:histogram_quantile + - name: kube-apiserver.rules + rules: + - expr: | + histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - name: node.rules + rules: + - expr: sum(min(kube_pod_info) by (node)) + record: ':kube_pod_info_node_count:' + - expr: | + max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod) + record: 'node_namespace_pod:kube_pod_info:' + - expr: | + count by (node) (sum by (node, cpu) ( + node_cpu_seconds_total{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + )) + record: node:node_num_cpu:sum + - expr: | + 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) + record: :node_cpu_utilisation:avg1m + - expr: | + 1 - avg by (node) ( + rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info:) + record: node:node_cpu_utilisation:avg1m + - expr: | + node:node_cpu_utilisation:avg1m + * + node:node_num_cpu:sum + / + scalar(sum(node:node_num_cpu:sum)) + record: node:cluster_cpu_utilisation:ratio + - expr: | + sum(node_load1{job="node-exporter"}) + / + sum(node:node_num_cpu:sum) + record: ':node_cpu_saturation_load1:' + - expr: | + sum by (node) ( + node_load1{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + / + node:node_num_cpu:sum + record: 'node:node_cpu_saturation_load1:' + - expr: | + 1 - + sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + / + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + record: ':node_memory_utilisation:' + - expr: | + sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + record: :node_memory_MemFreeCachedBuffers_bytes:sum + - expr: | + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + record: :node_memory_MemTotal_bytes:sum + - expr: | + sum by (node) ( + (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_bytes_available:sum + - expr: | + sum by (node) ( + node_memory_MemTotal_bytes{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_bytes_total:sum + - expr: | + (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) + / + node:node_memory_bytes_total:sum + record: node:node_memory_utilisation:ratio + - expr: | + (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) + / + scalar(sum(node:node_memory_bytes_total:sum)) + record: node:cluster_memory_utilisation:ratio + - expr: | + 1e3 * sum( + (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) + ) + record: :node_memory_swap_io_bytes:sum_rate + - expr: | + 1 - + sum by (node) ( + (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + / + sum by (node) ( + node_memory_MemTotal_bytes{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: 'node:node_memory_utilisation:' + - expr: | + 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) + record: 'node:node_memory_utilisation_2:' + - expr: | + 1e3 * sum by (node) ( + (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_swap_io_bytes:sum_rate + - expr: | + avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) + record: :node_disk_utilisation:avg_irate + - expr: | + avg by (node) ( + irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_disk_utilisation:avg_irate + - expr: | + avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) + record: :node_disk_saturation:avg_irate + - expr: | + avg by (node) ( + irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_disk_saturation:avg_irate + - expr: | + max by (namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} + - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + record: 'node:node_filesystem_usage:' + - expr: | + max by (namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + record: 'node:node_filesystem_avail:' + - expr: | + sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + record: :node_net_utilisation:sum_irate + - expr: | + sum by (node) ( + (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_net_utilisation:sum_irate + - expr: | + sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + record: :node_net_saturation:sum_irate + - expr: | + sum by (node) ( + (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_net_saturation:sum_irate + - expr: | + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_total:' + - expr: | + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_free:' + - name: kube-prometheus-node-recording.rules + rules: + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY + (instance) + record: instance:node_cpu:rate:sum + - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) + BY (instance) + record: instance:node_filesystem_usage:sum + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT + (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) + BY (instance, cpu)) BY (instance) + record: instance:node_cpu:ratio + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) + record: cluster:node_cpu:sum_rate5m + - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) + BY (instance, cpu)) + record: cluster:node_cpu:ratio + - name: kubernetes-absent + rules: + - alert: AlertmanagerDown + annotations: + message: Alertmanager has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown + expr: | + absent(up{job="alertmanager-main",namespace="monitoring"} == 1) + for: 15m + labels: + severity: critical + - alert: CoreDNSDown + annotations: + message: CoreDNS has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-corednsdown + expr: | + absent(up{job="kube-dns"} == 1) + for: 15m + labels: + severity: critical + - alert: KubeAPIDown + annotations: + message: KubeAPI has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown + expr: | + absent(up{job="apiserver"} == 1) + for: 15m + labels: + severity: critical + - alert: KubeControllerManagerDown + annotations: + message: KubeControllerManager has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown + expr: | + absent(up{job="kube-controller-manager"} == 1) + for: 15m + labels: + severity: critical + - alert: KubeSchedulerDown + annotations: + message: KubeScheduler has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown + expr: | + absent(up{job="kube-scheduler"} == 1) + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsDown + annotations: + message: KubeStateMetrics has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown + expr: | + absent(up{job="kube-state-metrics"} == 1) + for: 15m + labels: + severity: critical + - alert: KubeletDown + annotations: + message: Kubelet has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown + expr: | + absent(up{job="kubelet"} == 1) + for: 15m + labels: + severity: critical + - alert: NodeExporterDown + annotations: + message: NodeExporter has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown + expr: | + absent(up{job="node-exporter"} == 1) + for: 15m + labels: + severity: critical + - alert: PrometheusDown + annotations: + message: Prometheus has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown + expr: | + absent(up{job="prometheus-k8s",namespace="monitoring"} == 1) + for: 15m + labels: + severity: critical + - alert: PrometheusOperatorDown + annotations: + message: PrometheusOperator has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown + expr: | + absent(up{job="prometheus-operator",namespace="monitoring"} == 1) + for: 15m + labels: + severity: critical + - name: kubernetes-apps + rules: + - alert: KubePodCrashLooping + annotations: + message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container + }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping + expr: | + rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 + for: 1h + labels: + severity: critical + - alert: KubePodNotReady + annotations: + message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready + state for longer than an hour. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready + expr: | + sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0 + for: 1h + labels: + severity: critical + - alert: KubeDeploymentGenerationMismatch + annotations: + message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment + }} does not match, this indicates that the Deployment has failed but has + not been rolled back. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch + expr: | + kube_deployment_status_observed_generation{job="kube-state-metrics"} + != + kube_deployment_metadata_generation{job="kube-state-metrics"} + for: 15m + labels: + severity: critical + - alert: KubeDeploymentReplicasMismatch + annotations: + message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not + matched the expected number of replicas for longer than an hour. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch + expr: | + kube_deployment_spec_replicas{job="kube-state-metrics"} + != + kube_deployment_status_replicas_available{job="kube-state-metrics"} + for: 1h + labels: + severity: critical + - alert: KubeStatefulSetReplicasMismatch + annotations: + message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has + not matched the expected number of replicas for longer than 15 minutes. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch + expr: | + kube_statefulset_status_replicas_ready{job="kube-state-metrics"} + != + kube_statefulset_status_replicas{job="kube-state-metrics"} + for: 15m + labels: + severity: critical + - alert: KubeStatefulSetGenerationMismatch + annotations: + message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset + }} does not match, this indicates that the StatefulSet has failed but has + not been rolled back. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch + expr: | + kube_statefulset_status_observed_generation{job="kube-state-metrics"} + != + kube_statefulset_metadata_generation{job="kube-state-metrics"} + for: 15m + labels: + severity: critical + - alert: KubeStatefulSetUpdateNotRolledOut + annotations: + message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update + has not been rolled out. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout + expr: | + max without (revision) ( + kube_statefulset_status_current_revision{job="kube-state-metrics"} + unless + kube_statefulset_status_update_revision{job="kube-state-metrics"} + ) + * + ( + kube_statefulset_replicas{job="kube-state-metrics"} + != + kube_statefulset_status_replicas_updated{job="kube-state-metrics"} + ) + for: 15m + labels: + severity: critical + - alert: KubeDaemonSetRolloutStuck + annotations: + message: Only {{ $value }}% of the desired Pods of DaemonSet {{ $labels.namespace + }}/{{ $labels.daemonset }} are scheduled and ready. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck + expr: | + kube_daemonset_status_number_ready{job="kube-state-metrics"} + / + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} * 100 < 100 + for: 15m + labels: + severity: critical + - alert: KubeDaemonSetNotScheduled + annotations: + message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset + }} are not scheduled.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled + expr: | + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} + - + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics"} > 0 + for: 10m + labels: + severity: warning + - alert: KubeDaemonSetMisScheduled + annotations: + message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset + }} are running where they are not supposed to run.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled + expr: | + kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0 + for: 10m + labels: + severity: warning + - alert: KubeCronJobRunning + annotations: + message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more + than 1h to complete. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning + expr: | + time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600 + for: 1h + labels: + severity: warning + - alert: KubeJobCompletion + annotations: + message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more + than one hour to complete. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion + expr: | + kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0 + for: 1h + labels: + severity: warning + - alert: KubeJobFailed + annotations: + message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed + expr: | + kube_job_status_failed{job="kube-state-metrics"} > 0 + for: 1h + labels: + severity: warning + - name: kubernetes-resources + rules: + - alert: KubeCPUOvercommit + annotations: + message: Cluster has overcommitted CPU resource requests for Pods and cannot + tolerate node failure. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit + expr: | + sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum) + / + sum(node:node_num_cpu:sum) + > + (count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum) + for: 5m + labels: + severity: warning + - alert: KubeMemOvercommit + annotations: + message: Cluster has overcommitted memory resource requests for Pods and cannot + tolerate node failure. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit + expr: | + sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum) + / + sum(node_memory_MemTotal_bytes) + > + (count(node:node_num_cpu:sum)-1) + / + count(node:node_num_cpu:sum) + for: 5m + labels: + severity: warning + - alert: KubeCPUOvercommit + annotations: + message: Cluster has overcommitted CPU resource requests for Namespaces. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit + expr: | + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) + / + sum(node:node_num_cpu:sum) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeMemOvercommit + annotations: + message: Cluster has overcommitted memory resource requests for Namespaces. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit + expr: | + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) + / + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeQuotaExceeded + annotations: + message: Namespace {{ $labels.namespace }} is using {{ printf "%0.0f" $value + }}% of its {{ $labels.resource }} quota. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded + expr: | + 100 * kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 90 + for: 15m + labels: + severity: warning + - alert: CPUThrottlingHigh + annotations: + message: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace + }} for container {{ $labels.container_name }} in pod {{ $labels.pod_name + }}.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh + expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!=\"\", + }[5m])) by (container_name, pod_name, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) + by (container_name, pod_name, namespace)\n > 25 \n" + for: 15m + labels: + severity: warning + - name: kubernetes-storage + rules: + - alert: KubePersistentVolumeUsageCritical + annotations: + message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim + }} in Namespace {{ $labels.namespace }} is only {{ printf "%0.2f" $value + }}% free. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical + expr: | + 100 * kubelet_volume_stats_available_bytes{job="kubelet"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet"} + < 3 + for: 1m + labels: + severity: critical + - alert: KubePersistentVolumeFullInFourDays + annotations: + message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim + }} in Namespace {{ $labels.namespace }} is expected to fill up within four + days. Currently {{ printf "%0.2f" $value }}% is available. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays + expr: | + 100 * ( + kubelet_volume_stats_available_bytes{job="kubelet"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet"} + ) < 15 + and + predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0 + for: 5m + labels: + severity: critical + - alert: KubePersistentVolumeErrors + annotations: + message: The persistent volume {{ $labels.persistentvolume }} has status {{ + $labels.phase }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors + expr: | + kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 + for: 5m + labels: + severity: critical + - name: kubernetes-system + rules: + - alert: KubeNodeNotReady + annotations: + message: '{{ $labels.node }} has been unready for more than an hour.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready + expr: | + kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 + for: 1h + labels: + severity: warning + - alert: KubeVersionMismatch + annotations: + message: There are {{ $value }} different semantic versions of Kubernetes + components running. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch + expr: | + count(count by (gitVersion) (label_replace(kubernetes_build_info{job!="kube-dns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1 + for: 1h + labels: + severity: warning + - alert: KubeClientErrors + annotations: + message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance + }}' is experiencing {{ printf "%0.0f" $value }}% errors.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors + expr: | + (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job) + / + sum(rate(rest_client_requests_total[5m])) by (instance, job)) + * 100 > 1 + for: 15m + labels: + severity: warning + - alert: KubeClientErrors + annotations: + message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance + }}' is experiencing {{ printf "%0.0f" $value }} errors / second. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors + expr: | + sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1 + for: 15m + labels: + severity: warning + - alert: KubeletTooManyPods + annotations: + message: Kubelet {{ $labels.instance }} is running {{ $value }} Pods, close + to the limit of 110. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods + expr: | + kubelet_running_pod_count{job="kubelet"} > 110 * 0.9 + for: 15m + labels: + severity: warning + - alert: KubeAPILatencyHigh + annotations: + message: The API server has a 99th percentile latency of {{ $value }} seconds + for {{ $labels.verb }} {{ $labels.resource }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh + expr: | + cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1 + for: 10m + labels: + severity: warning + - alert: KubeAPILatencyHigh + annotations: + message: The API server has a 99th percentile latency of {{ $value }} seconds + for {{ $labels.verb }} {{ $labels.resource }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh + expr: | + cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4 + for: 10m + labels: + severity: critical + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{ $value }}% of requests. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3 + for: 10m + labels: + severity: critical + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{ $value }}% of requests. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1 + for: 10m + labels: + severity: warning + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{ $value }}% of requests for + {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10 + for: 10m + labels: + severity: critical + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{ $value }}% of requests for + {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5 + for: 10m + labels: + severity: warning + - alert: KubeClientCertificateExpiration + annotations: + message: A client certificate used to authenticate to the apiserver is expiring + in less than 7.0 days. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration + expr: | + apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 + labels: + severity: warning + - alert: KubeClientCertificateExpiration + annotations: + message: A client certificate used to authenticate to the apiserver is expiring + in less than 24.0 hours. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration + expr: | + apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 + labels: + severity: critical + - name: alertmanager.rules + rules: + - alert: AlertmanagerConfigInconsistent + annotations: + message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` + are out of sync. + expr: | + count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 + for: 5m + labels: + severity: critical + - alert: AlertmanagerFailedReload + annotations: + message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace + }}/{{ $labels.pod}}. + expr: | + alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0 + for: 10m + labels: + severity: warning + - alert: AlertmanagerMembersInconsistent + annotations: + message: Alertmanager has not found all other members of the cluster. + expr: | + alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"} + != on (service) GROUP_LEFT() + count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}) + for: 5m + labels: + severity: critical + - name: general.rules + rules: + - alert: TargetDown + annotations: + message: '{{ $value }}% of the {{ $labels.job }} targets are down.' + expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10 + for: 10m + labels: + severity: warning + - alert: Watchdog + annotations: + message: | + This is an alert meant to ensure that the entire alerting pipeline is functional. + This alert is always firing, therefore it should always be firing in Alertmanager + and always fire against a receiver. There are integrations with various notification + mechanisms that send a notification when this alert is not firing. For example the + "DeadMansSnitch" integration in PagerDuty. + expr: vector(1) + labels: + severity: none + - name: kube-prometheus-node-alerting.rules + rules: + - alert: NodeDiskRunningFull + annotations: + message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace + }}/{{ $labels.pod }} will be full within the next 24 hours. + expr: | + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0) + for: 30m + labels: + severity: warning + - alert: NodeDiskRunningFull + annotations: + message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace + }}/{{ $labels.pod }} will be full within the next 2 hours. + expr: | + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0) + for: 10m + labels: + severity: critical + - name: node-time + rules: + - alert: ClockSkewDetected + annotations: + message: Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod + }}. Ensure NTP is configured correctly on this host. + expr: | + node_ntp_offset_seconds{job="node-exporter"} < -0.03 or node_ntp_offset_seconds{job="node-exporter"} > 0.03 + for: 2m + labels: + severity: warning + - name: node-network + rules: + - alert: NetworkReceiveErrors + annotations: + message: Network interface "{{ $labels.device }}" showing receive errors on + node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning + - alert: NetworkTransmitErrors + annotations: + message: Network interface "{{ $labels.device }}" showing transmit errors + on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning + - alert: NodeNetworkInterfaceDown + annotations: + message: Network interface "{{ $labels.device }}" down on node-exporter {{ + $labels.namespace }}/{{ $labels.pod }}" + expr: | + node_network_up{job="node-exporter",device!~"veth.+"} == 0 + for: 2m + labels: + severity: warning + - alert: NodeNetworkInterfaceFlapping + annotations: + message: Network interface "{{ $labels.device }}" changing it's up status + often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning + - name: prometheus.rules + rules: + - alert: PrometheusConfigReloadFailed + annotations: + description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}} + summary: Reloading Prometheus' configuration failed + expr: | + prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"} == 0 + for: 10m + labels: + severity: warning + - alert: PrometheusNotificationQueueRunningFull + annotations: + description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ + $labels.pod}} + summary: Prometheus' alert notification queue is running full + expr: | + predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"} + for: 10m + labels: + severity: warning + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ + $labels.pod}} to Alertmanager {{$labels.Alertmanager}} + summary: Errors while sending alert from Prometheus + expr: | + rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.01 + for: 10m + labels: + severity: warning + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ + $labels.pod}} to Alertmanager {{$labels.Alertmanager}} + summary: Errors while sending alerts from Prometheus + expr: | + rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.03 + for: 10m + labels: + severity: critical + - alert: PrometheusNotConnectedToAlertmanagers + annotations: + description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected + to any Alertmanagers + summary: Prometheus is not connected to any Alertmanagers + expr: | + prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"} < 1 + for: 10m + labels: + severity: warning + - alert: PrometheusTSDBReloadsFailing + annotations: + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} + reload failures over the last four hours.' + summary: Prometheus has issues reloading data blocks from disk + expr: | + increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0 + for: 12h + labels: + severity: warning + - alert: PrometheusTSDBCompactionsFailing + annotations: + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} + compaction failures over the last four hours.' + summary: Prometheus has issues compacting sample blocks + expr: | + increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0 + for: 12h + labels: + severity: warning + - alert: PrometheusTSDBWALCorruptions + annotations: + description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead + log (WAL).' + summary: Prometheus write-ahead log is corrupted + expr: | + prometheus_tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusNotIngestingSamples + annotations: + description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting + samples. + summary: Prometheus isn't ingesting samples + expr: | + rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0 + for: 10m + labels: + severity: warning + - alert: PrometheusTargetScrapesDuplicate + annotations: + description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected + due to duplicate timestamps but different values' + summary: Prometheus has many samples rejected + expr: | + increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 10m + labels: + severity: warning + - name: prometheus-operator + rules: + - alert: PrometheusOperatorReconcileErrors + annotations: + message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace + }} Namespace. + expr: | + rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 + for: 10m + labels: + severity: warning + - alert: PrometheusOperatorNodeLookupErrors + annotations: + message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. + expr: | + rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 + for: 10m + labels: + severity: warning diff --git a/manifests/prometheus-service.yaml b/manifests/prometheus-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f61e88ab777a70b7ff106cceb10a42556949e2b --- /dev/null +++ b/manifests/prometheus-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + prometheus: k8s + name: prometheus-k8s + namespace: monitoring +spec: + ports: + - name: web + port: 9090 + targetPort: web + selector: + app: prometheus + prometheus: k8s + sessionAffinity: ClientIP diff --git a/manifests/prometheus-serviceAccount.yaml b/manifests/prometheus-serviceAccount.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e55fad675b8691f3aab29f152fdaa5416605745 --- /dev/null +++ b/manifests/prometheus-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-k8s + namespace: monitoring diff --git a/manifests/prometheus-serviceMonitor.yaml b/manifests/prometheus-serviceMonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7605dbeb6e4a4253db105a0a6faee1882880767 --- /dev/null +++ b/manifests/prometheus-serviceMonitor.yaml @@ -0,0 +1,14 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: prometheus + name: prometheus + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: web + selector: + matchLabels: + prometheus: k8s diff --git a/manifests/prometheus-serviceMonitorApiserver.yaml b/manifests/prometheus-serviceMonitorApiserver.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5dea38e401421cd37bba16f8a8368407ee991601 --- /dev/null +++ b/manifests/prometheus-serviceMonitorApiserver.yaml @@ -0,0 +1,37 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: apiserver + name: kube-apiserver + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + metricRelabelings: + - action: drop + regex: etcd_(debugging|disk|request|server).* + sourceLabels: + - __name__ + - action: drop + regex: apiserver_admission_controller_admission_latencies_seconds_.* + sourceLabels: + - __name__ + - action: drop + regex: apiserver_admission_step_admission_latencies_seconds_.* + sourceLabels: + - __name__ + port: https + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + jobLabel: component + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: apiserver + provider: kubernetes diff --git a/manifests/prometheus-serviceMonitorCoreDNS.yaml b/manifests/prometheus-serviceMonitorCoreDNS.yaml new file mode 100644 index 0000000000000000000000000000000000000000..633aa18cf1d4d352e4f527b726a1127393da6f4d --- /dev/null +++ b/manifests/prometheus-serviceMonitorCoreDNS.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: coredns + name: coredns + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 15s + port: metrics + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-dns diff --git a/manifests/prometheus-serviceMonitorKubeControllerManager.yaml b/manifests/prometheus-serviceMonitorKubeControllerManager.yaml new file mode 100644 index 0000000000000000000000000000000000000000..153a90da112806239eef66354daf4616583433d5 --- /dev/null +++ b/manifests/prometheus-serviceMonitorKubeControllerManager.yaml @@ -0,0 +1,23 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kube-controller-manager + name: kube-controller-manager + namespace: monitoring +spec: + endpoints: + - interval: 30s + metricRelabelings: + - action: drop + regex: etcd_(debugging|disk|request|server).* + sourceLabels: + - __name__ + port: http-metrics + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-controller-manager diff --git a/manifests/prometheus-serviceMonitorKubeScheduler.yaml b/manifests/prometheus-serviceMonitorKubeScheduler.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f00db0e474023495ed4a825d61674367b1dfd7b8 --- /dev/null +++ b/manifests/prometheus-serviceMonitorKubeScheduler.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kube-scheduler + name: kube-scheduler + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: http-metrics + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-scheduler diff --git a/manifests/prometheus-serviceMonitorKubelet.yaml b/manifests/prometheus-serviceMonitorKubelet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91da377a476c45bd2df296add7eaab6a64ee07b1 --- /dev/null +++ b/manifests/prometheus-serviceMonitorKubelet.yaml @@ -0,0 +1,36 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kubelet + name: kubelet + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + port: https-metrics + scheme: https + tlsConfig: + insecureSkipVerify: true + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + metricRelabelings: + - action: drop + regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + sourceLabels: + - __name__ + path: /metrics/cadvisor + port: https-metrics + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kubelet diff --git a/sync-to-internal-registry.jsonnet b/sync-to-internal-registry.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..b7c85571f5617cf5320ba6218689281df30a7143 --- /dev/null +++ b/sync-to-internal-registry.jsonnet @@ -0,0 +1,30 @@ +local kp = import 'kube-prometheus/kube-prometheus.libsonnet'; +local l = import 'kube-prometheus/lib/lib.libsonnet'; +local config = kp._config; + +local makeImages(config) = [ + { + name: config.imageRepos[image], + tag: config.versions[image], + } + for image in std.objectFields(config.imageRepos) +]; + +local upstreamImage(image) = '%s:%s' % [image.name, image.tag]; +local downstreamImage(registry, image) = '%s/%s:%s' % [registry, l.imageName(image.name), image.tag]; + +local pullPush(image, newRegistry) = [ + 'docker pull %s' % upstreamImage(image), + 'docker tag %s %s' % [upstreamImage(image), downstreamImage(newRegistry, image)], + 'docker push %s' % downstreamImage(newRegistry, image), +]; + +local images = makeImages(config); + +local output(repository) = std.flattenArrays([ + pullPush(image, repository) + for image in images +]); + +function(repository='my-registry.com/repository') + std.join('\n', output(repository)) diff --git a/test.sh b/test.sh new file mode 100755 index 0000000000000000000000000000000000000000..cfdf584c5439758ff91437d314ce7db631dc132c --- /dev/null +++ b/test.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -e +# only exit with zero if all commands of the pipeline exit successfully +set -o pipefail + + +for i in examples/jsonnet-snippets/*.jsonnet; do + [ -f "$i" ] || break + echo "Testing: ${i}" + echo "" + fileContent=$(<"$i") + snippet="local kp = $fileContent; + +$(<examples/jsonnet-build-snippet/build-snippet.jsonnet)" + echo "${snippet}" > "test.jsonnet" + echo "\`\`\`" + echo "${snippet}" + echo "\`\`\`" + echo "" + jsonnet -J vendor "test.jsonnet" > /dev/null + rm -rf "test.jsonnet" +done + +for i in examples/*.jsonnet; do + [ -f "$i" ] || break + echo "Testing: ${i}" + echo "" + echo "\`\`\`" + cat "${i}" + echo "\`\`\`" + echo "" + jsonnet -J vendor "${i}" > /dev/null +done diff --git a/tests/e2e/main_test.go b/tests/e2e/main_test.go new file mode 100644 index 0000000000000000000000000000000000000000..e63730a3df146eb16c30ad7ea9426c198bf4ba14 --- /dev/null +++ b/tests/e2e/main_test.go @@ -0,0 +1,118 @@ +// Copyright 2019 The prometheus-operator Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "log" + "os" + "testing" + "time" + + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" +) + +var promClient *prometheusClient + +func TestMain(m *testing.M) { + os.Exit(testMain(m)) +} + +// testMain circumvents the issue, that one can not call `defer` in TestMain, as +// `os.Exit` does not honor `defer` statements. For more details see: +// http://blog.englund.nu/golang,/testing/2017/03/12/using-defer-in-testmain.html +func testMain(m *testing.M) int { + kubeConfigPath, ok := os.LookupEnv("KUBECONFIG") + if !ok { + log.Fatal("failed to retrieve KUBECONFIG env var") + } + + config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) + if err != nil { + log.Fatal(err) + } + + kubeClient, err := kubernetes.NewForConfig(config) + if err != nil { + log.Fatal(errors.Wrap(err, "creating kubeClient failed")) + } + + promClient = newPrometheusClient(kubeClient) + + return m.Run() +} + +func TestQueryPrometheus(t *testing.T) { + t.Parallel() + queries := []struct { + query string + expectN int + }{ + { + // query: `up{job="node-exporter"} == 1`, + // expectN: 1, + // }, { + // query: `up{job="kubelet"} == 1`, + // expectN: 1, + // }, { + query: `up{job="apiserver"} == 1`, + expectN: 1, + // }, { + // query: `up{job="kube-state-metrics"} == 1`, + // expectN: 1, + }, { + query: `up{job="prometheus-k8s"} == 1`, + expectN: 1, + }, { + query: `up{job="prometheus-operator"} == 1`, + expectN: 1, + }, { + query: `up{job="alertmanager-main"} == 1`, + expectN: 2, + }, + } + + // Wait for pod to respond at queries at all. Then start verifying their results. + err := wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) { + _, err := promClient.query("up") + return err == nil, nil + }) + if err != nil { + t.Fatal(errors.Wrap(err, "wait for prometheus-k8s")) + } + + err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) { + defer t.Log("---------------------------\n") + + for _, q := range queries { + n, err := promClient.query(q.query) + if err != nil { + return false, err + } + if n < q.expectN { + // Don't return an error as targets may only become visible after a while. + t.Logf("expected at least %d results for %q but got %d", q.expectN, q.query, n) + return false, nil + } + t.Logf("query %q succeeded", q.query) + } + return true, nil + }) + if err != nil { + t.Fatal(err) + } +} diff --git a/tests/e2e/prometheus_client.go b/tests/e2e/prometheus_client.go new file mode 100644 index 0000000000000000000000000000000000000000..b87ce3e5fff8ca7f4b32d392304d2c007eb8703b --- /dev/null +++ b/tests/e2e/prometheus_client.go @@ -0,0 +1,52 @@ +// Copyright 2019 The prometheus-operator Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "k8s.io/client-go/kubernetes" + + "github.com/Jeffail/gabs" +) + +type prometheusClient struct { + kubeClient kubernetes.Interface +} + +func newPrometheusClient(kubeClient kubernetes.Interface) *prometheusClient { + return &prometheusClient{kubeClient} +} + +// Query makes a request against the Prometheus /api/v1/query endpoint. +func (c *prometheusClient) query(query string) (int, error) { + req := c.kubeClient.CoreV1().RESTClient().Get(). + Namespace("monitoring"). + Resource("pods"). + SubResource("proxy"). + Name("prometheus-k8s-0:9090"). + Suffix("/api/v1/query").Param("query", query) + + b, err := req.DoRaw() + if err != nil { + return 0, err + } + + res, err := gabs.ParseJSON(b) + if err != nil { + return 0, err + } + + n, err := res.ArrayCountP("data.result") + return n, err +} diff --git a/tests/e2e/travis-e2e.sh b/tests/e2e/travis-e2e.sh new file mode 100755 index 0000000000000000000000000000000000000000..d1149697928ac4323adccbad72f0d622eb495fff --- /dev/null +++ b/tests/e2e/travis-e2e.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# exit immediately when a command fails +set -e +# only exit with zero if all commands of the pipeline exit successfully +set -o pipefail +# error on unset variables +set -u +# print each command before executing it +set -x + +SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}") + +"${SCRIPT_DIR}"/../../../../scripts/create-minikube.sh + +( + cd "${SCRIPT_DIR}"/../.. || exit + kubectl apply -f manifests + KUBECONFIG=~/.kube/config make test-e2e +) + +"${SCRIPT_DIR}"/../../../../scripts/delete-minikube.sh