diff --git a/Makefile b/Makefile index 5f2eaa4eea593745bf88a54bd9fc9173ad03045c..994fa5ae6b2eb2853837d3127108458385750e21 100644 --- a/Makefile +++ b/Makefile @@ -34,9 +34,9 @@ generate: manifests **.md **.md: $(shell find examples) build.sh example.jsonnet $(EMBEDMD_BINARY) -w `find . -name "*.md" | grep -v vendor` -manifests: vendor example.jsonnet build.sh +manifests: examples/kustomize.jsonnet vendor build.sh rm -rf manifests - ./build.sh ./examples/kustomize.jsonnet + ./build.sh $< vendor: jsonnetfile.json jsonnetfile.lock.json rm -rf vendor diff --git a/README.md b/README.md index 604b7e7e33fc88685f50be695a11d36412d14820..9a31c1277d4850fb147c6889cc6213a422bc2532 100644 --- a/README.md +++ b/README.md @@ -71,13 +71,13 @@ This adapter is an Extension API Server and Kubernetes needs to be have this fea ### minikube -In order to just try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command: +To try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command: ```shell -$ minikube delete && minikube start --kubernetes-version=v1.14.4 --memory=4096 --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0 +$ minikube delete && minikube start --kubernetes-version=v1.16.0 --memory=6g --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0 ``` -The kube-prometheus stack includes a resource metrics API server, like the metrics-server does. So ensure the metrics-server plugin is disabled on minikube: +The kube-prometheus stack includes a resource metrics API server, so the metrics-server addon is not necessary. Ensure the metrics-server addon is disabled on minikube: ```shell $ minikube addons disable metrics-server @@ -90,20 +90,23 @@ $ minikube addons disable metrics-server This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository). Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run: - * Simply create the stack: -```shell -$ kubectl create -f manifests/ - -# It can take a few seconds for the above 'create manifests' command to fully create the following resources, so verify the resources are ready before proceeding. -$ until kubectl get customresourcedefinitions servicemonitors.monitoring.coreos.com ; do date; sleep 1; echo ""; done -$ until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done + * Create the monitoring stack using the config in the `manifests` directory: -$ kubectl apply -f manifests/ # This command sometimes may need to be done twice (to workaround a race condition). +```shell +# Create the namespace and CRDs, and then wait for them to be availble before creating the remaining resources +kubectl create -f manifests/setup +until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done +kubectl create -f manifests/ ``` +We create the namespace and CustomResourceDefinitions first to avoid race conditions when deploying the monitoring components. +Alternatively, the resources in both folders can be applied with a single command +`kubectl create -f manifests/setup -f manifests`, but it may be necessary to run the command multiple times for all components to +be created successfullly. + * And to teardown the stack: ```shell -$ kubectl delete -f manifests/ +kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup ``` ### Access the dashboards @@ -187,8 +190,13 @@ local kp = }, }; -{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + -{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ + ['setup/prometheus-operator-' + name]: kp.prometheusOperator[name] + for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator)) +} + +// serviceMonitor is separated so that it can be created after the CRDs are ready +{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } + { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + @@ -212,7 +220,7 @@ set -o pipefail # Make sure to start with a clean 'manifests' dir rm -rf manifests -mkdir manifests +mkdir -p manifests/setup # optional, but we would like to generate yaml, not json jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {} @@ -565,34 +573,34 @@ You can define ServiceMonitor resources in your `jsonnet` spec. See the snippet ```jsonnet local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { _config+:: { - namespace: 'monitoring', - prometheus+:: { - namespaces+: ['my-namespace', 'my-second-namespace'], - } - }, + namespace: 'monitoring', prometheus+:: { - serviceMonitorMyNamespace: { - apiVersion: 'monitoring.coreos.com/v1', - kind: 'ServiceMonitor', - metadata: { - name: 'my-servicemonitor', - namespace: 'my-namespace', + namespaces+: ['my-namespace', 'my-second-namespace'], + }, + }, + prometheus+:: { + serviceMonitorMyNamespace: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'my-servicemonitor', + namespace: 'my-namespace', + }, + spec: { + jobLabel: 'app', + endpoints: [ + { + port: 'http-metrics', }, - spec: { - jobLabel: 'app', - endpoints: [ - { - port: 'http-metrics', - }, - ], - selector: { - matchLabels: { - 'app': 'myapp', - }, - }, + ], + selector: { + matchLabels: { + app: 'myapp', }, }, - }, + }, + }, + }, }; diff --git a/build.sh b/build.sh index f68cd4475df21548ce6f7b26eff574929f3043e7..0663889d83038e825061fcfa3e332b51df1a4711 100755 --- a/build.sh +++ b/build.sh @@ -9,7 +9,7 @@ set -o pipefail # Make sure to start with a clean 'manifests' dir rm -rf manifests -mkdir manifests +mkdir -p manifests/setup # optional, but we would like to generate yaml, not json jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {} diff --git a/docs/developing-prometheus-rules-and-grafana-dashboards.md b/docs/developing-prometheus-rules-and-grafana-dashboards.md index aa1283ecea847551d3aaaefc6ad22f73c603f7f4..974c3cc440a763b1fa9b552f0e355ac507fa15ff 100644 --- a/docs/developing-prometheus-rules-and-grafana-dashboards.md +++ b/docs/developing-prometheus-rules-and-grafana-dashboards.md @@ -24,8 +24,13 @@ local kp = }, }; -{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + -{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ + ['setup/prometheus-operator-' + name]: kp.prometheusOperator[name] + for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator)) +} + +// serviceMonitor is separated so that it can be created after the CRDs are ready +{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } + { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + @@ -305,3 +310,24 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } ``` + +Incase you have lots of json dashboard exported out from grafan UI the above approch is going to take lots of time. to improve performance we can use `rawGrafanaDashboards` field and provide it's value as json string by using importstr +[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example-2.jsonnet) +```jsonnet +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + rawGrafanaDashboards+:: { + 'my-dashboard.json': (importstr 'example-grafana-dashboard.json'), + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +``` diff --git a/example.jsonnet b/example.jsonnet index 7002eff26b6619042086f891c02dd85e67e85ee3..77864e09a87d14f1dd7befbe65c7921eb03af326 100644 --- a/example.jsonnet +++ b/example.jsonnet @@ -12,8 +12,13 @@ local kp = }, }; -{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + -{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ + ['setup/prometheus-operator-' + name]: kp.prometheusOperator[name] + for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator)) +} + +// serviceMonitor is separated so that it can be created after the CRDs are ready +{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } + { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + diff --git a/examples/additional-namespaces-servicemonitor.jsonnet b/examples/additional-namespaces-servicemonitor.jsonnet index 0262b4e060cc093d2d26f55a249b40c2255f0432..0f3add96d462392f903fbf64276d3c42f822f459 100644 --- a/examples/additional-namespaces-servicemonitor.jsonnet +++ b/examples/additional-namespaces-servicemonitor.jsonnet @@ -1,33 +1,33 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { _config+:: { - namespace: 'monitoring', - prometheus+:: { - namespaces+: ['my-namespace', 'my-second-namespace'], - } - }, + namespace: 'monitoring', prometheus+:: { - serviceMonitorMyNamespace: { - apiVersion: 'monitoring.coreos.com/v1', - kind: 'ServiceMonitor', - metadata: { - name: 'my-servicemonitor', - namespace: 'my-namespace', + namespaces+: ['my-namespace', 'my-second-namespace'], + }, + }, + prometheus+:: { + serviceMonitorMyNamespace: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'my-servicemonitor', + namespace: 'my-namespace', + }, + spec: { + jobLabel: 'app', + endpoints: [ + { + port: 'http-metrics', }, - spec: { - jobLabel: 'app', - endpoints: [ - { - port: 'http-metrics', - }, - ], - selector: { - matchLabels: { - 'app': 'myapp', - }, - }, + ], + selector: { + matchLabels: { + app: 'myapp', }, }, - }, + }, + }, + }, }; diff --git a/examples/grafana-additional-rendered-dashboard-example-2.jsonnet b/examples/grafana-additional-rendered-dashboard-example-2.jsonnet new file mode 100644 index 0000000000000000000000000000000000000000..391b0f02e62611853010ea4265fcf6bc59fa7c61 --- /dev/null +++ b/examples/grafana-additional-rendered-dashboard-example-2.jsonnet @@ -0,0 +1,16 @@ +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + }, + rawGrafanaDashboards+:: { + 'my-dashboard.json': (importstr 'example-grafana-dashboard.json'), + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } diff --git a/examples/kustomize.jsonnet b/examples/kustomize.jsonnet index db3ba344278cbaac7e8cda89fc542eb2e3d736f3..38dd6c89d66559beff0c949545c2f1e5b40cccbf 100644 --- a/examples/kustomize.jsonnet +++ b/examples/kustomize.jsonnet @@ -8,8 +8,13 @@ local kp = local manifests = // Uncomment line below to enable vertical auto scaling of kube-state-metrics //{ ['ksm-autoscaler-' + name]: kp.ksmAutoscaler[name] for name in std.objectFields(kp.ksmAutoscaler) } + - { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + - { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + + { ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + + { + ['setup/prometheus-operator-' + name]: kp.prometheusOperator[name] + for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator)) + } + + // serviceMonitor is separated so that it can be created after the CRDs are ready + { 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } + { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + diff --git a/go.mod b/go.mod index b67b2a09b515e91d1d10353c9773a5f2a3c8a861..2be821b6b9c1168649ed4ac38d7687ae148e2370 100644 --- a/go.mod +++ b/go.mod @@ -4,25 +4,30 @@ go 1.12 require ( github.com/Jeffail/gabs v1.2.0 - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect + github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect github.com/gogo/protobuf v1.1.1 // indirect github.com/google/gofuzz v0.0.0-20170612174753-24818f796faf // indirect github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d // indirect github.com/imdario/mergo v0.3.7 // indirect github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be // indirect + github.com/jsonnet-bundler/jsonnet-bundler v0.1.0 // indirect + github.com/mattn/go-colorable v0.1.4 // indirect + github.com/mattn/go-isatty v0.0.10 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.1 // indirect github.com/pkg/errors v0.8.1 - github.com/pmezard/go-difflib v1.0.0 // indirect github.com/spf13/pflag v1.0.3 // indirect - github.com/stretchr/testify v1.2.2 // indirect + github.com/stretchr/objx v0.2.0 // indirect golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a // indirect golang.org/x/net v0.0.0-20190206173232-65e2d4e15006 // indirect golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a // indirect + golang.org/x/sys v0.0.0-20191023151326-f89234f9a2c2 // indirect golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db // indirect golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v2 v2.2.2 // indirect + gopkg.in/yaml.v2 v2.2.4 // indirect k8s.io/api v0.0.0-20190313235455-40a48860b5ab // indirect k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 k8s.io/client-go v11.0.0+incompatible diff --git a/go.sum b/go.sum index 94564ff1233d0258fdf2e39709dba243d38d4542..a53919d64d6224677aefdde86e1a3a89d7d583e9 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,18 @@ cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/Jeffail/gabs v1.2.0 h1:uFhoIVTtsX7hV2RxNgWad8gMU+8OJdzFbOathJdhD3o= github.com/Jeffail/gabs v1.2.0/go.mod h1:6xMvQMK4k33lb7GUUpaAPh6nKMmemQeg5d4gn7/bOXc= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= @@ -15,18 +25,32 @@ github.com/imdario/mergo v0.3.7 h1:Y+UAYTZ7gDEuOfhxKWy+dvb5dRQ6rJjFSdX2HZY1/gI= github.com/imdario/mergo v0.3.7/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be h1:AHimNtVIpiBjPUhEF5KNCkrUyqTSA5zWUl8sQ2bfGBE= github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/jsonnet-bundler/jsonnet-bundler v0.1.0 h1:T/HtHFr+mYCRULrH1x/RnoB0prIs0rMkolJhFMXNC9A= +github.com/jsonnet-bundler/jsonnet-bundler v0.1.0/go.mod h1:YKsSFc9VFhhLITkJS3X2PrRqWG9u2Jq99udTdDjQLfM= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA= +github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.6/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.10 h1:qxFzApOv4WsAL965uUPIsXzAKCZxN2p9UqdhFS4ZW10= +github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a h1:Igim7XhdOpBnWPuYJ70XcNpq8q3BCACtVgNfoJxOV7g= golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -37,8 +61,12 @@ golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0 golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190310054646-10058d7d4faa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e h1:nFYrTHrdrAOpShe27kaFHjsqYSEQ0KWqdWLu3xuZJts= golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191023151326-f89234f9a2c2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db h1:6/JqlYfC1CCaLnGceQTI+sDGhC9UBSPAsBqI0Gun6kU= golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= @@ -47,12 +75,16 @@ golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= k8s.io/api v0.0.0-20190313235455-40a48860b5ab h1:DG9A67baNpoeweOy2spF1OWHhnVY5KR7/Ek/+U1lVZc= k8s.io/api v0.0.0-20190313235455-40a48860b5ab/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA= k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 h1:IS7K02iBkQXpCeieSiyJjGoLSdVOv2DbPaWHJ+ZtgKg= diff --git a/jsonnet/kube-prometheus/alerts/general.libsonnet b/jsonnet/kube-prometheus/alerts/general.libsonnet index 79eba7d99c7b45a662a950ba2aa296db20669454..441ec777b48b6edc0cdd06aa73a0d07c9a7da695 100644 --- a/jsonnet/kube-prometheus/alerts/general.libsonnet +++ b/jsonnet/kube-prometheus/alerts/general.libsonnet @@ -7,7 +7,7 @@ { alert: 'TargetDown', annotations: { - message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }} targets are down.', + message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }} targets in {{ $labels.namespace }} namespace are down.', }, expr: '100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10', 'for': '10m', diff --git a/jsonnet/kube-prometheus/jsonnetfile.json b/jsonnet/kube-prometheus/jsonnetfile.json index 4817e20c049a16bd6924d1447e48aef87cfde35d..fa7545afb27ca60867b0eb981dbd2b8aaf034d1d 100644 --- a/jsonnet/kube-prometheus/jsonnetfile.json +++ b/jsonnet/kube-prometheus/jsonnetfile.json @@ -38,7 +38,7 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "release-0.33" + "version": "release-0.34" }, { "name": "etcd-mixin", diff --git a/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet b/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet index 1bd64e1b3b21ab84bd79b2ebb2a86d3e24d0ab07..ab5dceae5c3e9330ba8799a5fad242bfb532db8b 100644 --- a/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet +++ b/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet @@ -9,6 +9,12 @@ scheme: 'http', interval: '30s', bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + relabelings: [ + { + sourceLabels: ['__metrics_path__'], + targetLabel: 'metrics_path' + }, + ], }, { port: 'http-metrics', @@ -17,6 +23,21 @@ interval: '30s', honorLabels: true, bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + relabelings: [ + { + sourceLabels: ['__metrics_path__'], + targetLabel: 'metrics_path' + }, + ], + metricRelabelings: [ + // Drop a bunch of metrics which are disabled but still sent, see + // https://github.com/google/cadvisor/issues/1925. + { + sourceLabels: ['__name__'], + regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)', + action: 'drop', + }, + ], }, ], }, diff --git a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet index aeca71bcd0eccb8973dfb232fbe6286239122f8f..e30a63c3b77214fdf0a4de12a5ebc0be59adfaa0 100644 --- a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet +++ b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet @@ -14,7 +14,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; }, versions+:: { - kubeStateMetrics: 'v1.7.2', + kubeStateMetrics: 'v1.8.0', kubeRbacProxy: 'v0.4.1', }, diff --git a/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet b/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet index 9a3d29ebbd109067fae4345b1b55a2d112139a56..06a7397fce1bb75beb66e4f4805ade2ae50546f1 100644 --- a/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet +++ b/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet @@ -118,8 +118,8 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; // forgo declaring the host port, however it is important to declare // it so that the scheduler can decide if the pod is schedulable. container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) + - container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) + - container.mixin.resources.withLimits({ cpu: '20m', memory: '60Mi' }) + + container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) + + container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits) + container.withEnv([ip]); local c = [nodeExporter, proxy]; diff --git a/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet index 42baf52a76d9650fa04dfed3e61d50ccc2ddffef..198260213156ec121cfc4574f94fde0a14a413a8 100644 --- a/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet +++ b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet @@ -18,6 +18,8 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; }, prometheus+:: { + name: 'k8s', + replicas: 2, rules: {}, namespaces: ['default', 'kube-system', $._config.namespace], }, @@ -26,10 +28,10 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; prometheus+:: { local p = self, - name:: 'k8s', + name:: $._config.prometheus.name, namespace:: $._config.namespace, roleBindingNamespaces:: $._config.prometheus.namespaces, - replicas:: 2, + replicas:: $._config.prometheus.replicas, prometheusRules:: $._config.prometheus.rules, alertmanagerName:: $.alertmanager.service.metadata.name, @@ -281,6 +283,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; insecureSkipVerify: true, }, bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + relabelings: [ + { + sourceLabels: ['__metrics_path__'], + targetLabel: 'metrics_path' + }, + ], }, { port: 'https-metrics', @@ -292,6 +300,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; insecureSkipVerify: true, }, bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', + relabelings: [ + { + sourceLabels: ['__metrics_path__'], + targetLabel: 'metrics_path' + }, + ], metricRelabelings: [ // Drop a bunch of metrics which are disabled but still sent, see // https://github.com/google/cadvisor/issues/1925. diff --git a/jsonnetfile.json b/jsonnetfile.json index 3062e3bca82c37f9035fa47e592a9989b1ec022e..561224c271ff5b821e678541f1c3b0d9e9216580 100644 --- a/jsonnetfile.json +++ b/jsonnetfile.json @@ -1,13 +1,23 @@ { - "dependencies": [ - { - "name": "kube-prometheus", - "source": { - "local": { - "directory": "jsonnet/kube-prometheus" - } - }, - "version": "" + "dependencies": [ + { + "name": "kube-prometheus", + "source": { + "local": { + "directory": "jsonnet/kube-prometheus" } - ] + }, + "version": "" + }, + { + "name": "node-mixin", + "source": { + "git": { + "remote": "https://github.com/prometheus/node_exporter", + "subdir": "docs/node-mixin" + } + }, + "version": "5a7b85876d6108a91f0d8673c0d7eca38687671b" + } + ] } diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 0282336c7296e39b54196ea03ce4cb317cfdd807..daa3bb8f951cb77534cf53fad450ca093f2cd4d0 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -1,113 +1,123 @@ { - "dependencies": [ - { - "name": "kube-prometheus", - "source": { - "local": { - "directory": "jsonnet/kube-prometheus" - } - }, - "version": "" - }, - { - "name": "ksonnet", - "source": { - "git": { - "remote": "https://github.com/ksonnet/ksonnet-lib", - "subdir": "" - } - }, - "version": "0d2f82676817bbf9e4acf6495b2090205f323b9f" - }, - { - "name": "kubernetes-mixin", - "source": { - "git": { - "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", - "subdir": "" - } - }, - "version": "3ad401ea3ef7fb0879298fa411772984ffa7f31f" - }, - { - "name": "grafonnet", - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib", - "subdir": "grafonnet" - } - }, - "version": "47db72da03fc4a7a0658a87791e13c3315a3a252" - }, - { - "name": "grafana-builder", - "source": { - "git": { - "remote": "https://github.com/kausalco/public", - "subdir": "grafana-builder" - } - }, - "version": "3fe9a46d5fe0b70cbcabec1d2054f8ac3b3faae7" - }, - { - "name": "grafana", - "source": { - "git": { - "remote": "https://github.com/brancz/kubernetes-grafana", - "subdir": "grafana" - } - }, - "version": "539a90dbf63c812ad0194d8078dd776868a11c81" - }, - { - "name": "prometheus-operator", - "source": { - "git": { - "remote": "https://github.com/coreos/prometheus-operator", - "subdir": "jsonnet/prometheus-operator" - } - }, - "version": "908ee0372a9ac2c6574d589fdc56a4f3cb5f12d1" - }, - { - "name": "etcd-mixin", - "source": { - "git": { - "remote": "https://github.com/coreos/etcd", - "subdir": "Documentation/etcd-mixin" - } - }, - "version": "92f313811e7a9e0dbe73f288d27408817cb7865d" - }, - { - "name": "prometheus", - "source": { - "git": { - "remote": "https://github.com/prometheus/prometheus", - "subdir": "documentation/prometheus-mixin" - } - }, - "version": "5f1be2cf455409d3577ac9ddb43144ec2d39c90b" - }, - { - "name": "node-mixin", - "source": { - "git": { - "remote": "https://github.com/prometheus/node_exporter", - "subdir": "docs/node-mixin" - } - }, - "version": "9f49fff79ef85fcebb69289622150e6d5346528b" - }, - { - "name": "promgrafonnet", - "source": { - "git": { - "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", - "subdir": "lib/promgrafonnet" - } - }, - "version": "3ad401ea3ef7fb0879298fa411772984ffa7f31f" + "dependencies": [ + { + "name": "etcd-mixin", + "source": { + "git": { + "remote": "https://github.com/coreos/etcd", + "subdir": "Documentation/etcd-mixin" } - ] + }, + "version": "fa972cf29666e821c44195c51df15b6e28ed29c4", + "sum": "bkp18AxkOUYnVC15Gh9EoIi+mMAn0IT3hMzb8mlzpSw=" + }, + { + "name": "grafana", + "source": { + "git": { + "remote": "https://github.com/brancz/kubernetes-grafana", + "subdir": "grafana" + } + }, + "version": "539a90dbf63c812ad0194d8078dd776868a11c81", + "sum": "b8faWX1qqLGyN67sA36oRqYZ5HX+tHBRMPtrWRqIysE=" + }, + { + "name": "grafana-builder", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "grafana-builder" + } + }, + "version": "1f273dd3c7a619bcd05c3e1c2650204104a273d8", + "sum": "ELsYwK+kGdzX1mee2Yy+/b2mdO4Y503BOCDkFzwmGbE=" + }, + { + "name": "grafonnet", + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib", + "subdir": "grafonnet" + } + }, + "version": "47db72da03fc4a7a0658a87791e13c3315a3a252", + "sum": "ssLOSIiYWYBvEYJHwRtwMY4kPQoP+MuIUkT0bp2Mb6A=" + }, + { + "name": "ksonnet", + "source": { + "git": { + "remote": "https://github.com/ksonnet/ksonnet-lib", + "subdir": "" + } + }, + "version": "0d2f82676817bbf9e4acf6495b2090205f323b9f", + "sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=" + }, + { + "name": "kube-prometheus", + "source": { + "local": { + "directory": "jsonnet/kube-prometheus" + } + }, + "version": "" + }, + { + "name": "kubernetes-mixin", + "source": { + "git": { + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", + "subdir": "" + } + }, + "version": "325f8a46fac9605f1de8bc20ca811cb92d1ef7e5", + "sum": "qfm0EpLrEZ1+fe93LFLa9tyOalK6JehpholxO2d0xXU=" + }, + { + "name": "node-mixin", + "source": { + "git": { + "remote": "https://github.com/prometheus/node_exporter", + "subdir": "docs/node-mixin" + } + }, + "version": "5a7b85876d6108a91f0d8673c0d7eca38687671b", + "sum": "3N77msMjqClzQHbZOxn4GTlV+FZpU+y1gCekvCvxwy0=" + }, + { + "name": "prometheus", + "source": { + "git": { + "remote": "https://github.com/prometheus/prometheus", + "subdir": "documentation/prometheus-mixin" + } + }, + "version": "74726367cf7a7e8d0332238defd2e7f4169030bd", + "sum": "wSDLAXS5Xzla9RFRE2IW5mRToeRFULHb7dSYYBDfEsM=" + }, + { + "name": "prometheus-operator", + "source": { + "git": { + "remote": "https://github.com/coreos/prometheus-operator", + "subdir": "jsonnet/prometheus-operator" + } + }, + "version": "8d44e0990230144177f97cf62ae4f43b1c4e3168", + "sum": "5U7/8MD3pF9O0YDTtUhg4vctkUBRVFxZxWUyhtNiBM8=" + }, + { + "name": "promgrafonnet", + "source": { + "git": { + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", + "subdir": "lib/promgrafonnet" + } + }, + "version": "325f8a46fac9605f1de8bc20ca811cb92d1ef7e5", + "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc=" + } + ] } diff --git a/kustomization.yaml b/kustomization.yaml index a580ed8e6602627dd362095bc07356b0dc55d801..bd03a83e0008b4dc8340f71f5c1963a05b381f17 100644 --- a/kustomization.yaml +++ b/kustomization.yaml @@ -1,18 +1,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- ./manifests/00namespace-namespace.yaml -- ./manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml -- ./manifests/0prometheus-operator-0podmonitorCustomResourceDefinition.yaml -- ./manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml -- ./manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml -- ./manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml -- ./manifests/0prometheus-operator-clusterRole.yaml -- ./manifests/0prometheus-operator-clusterRoleBinding.yaml -- ./manifests/0prometheus-operator-deployment.yaml -- ./manifests/0prometheus-operator-service.yaml -- ./manifests/0prometheus-operator-serviceAccount.yaml -- ./manifests/0prometheus-operator-serviceMonitor.yaml - ./manifests/alertmanager-alertmanager.yaml - ./manifests/alertmanager-secret.yaml - ./manifests/alertmanager-service.yaml @@ -52,6 +40,7 @@ resources: - ./manifests/prometheus-adapter-serviceAccount.yaml - ./manifests/prometheus-clusterRole.yaml - ./manifests/prometheus-clusterRoleBinding.yaml +- ./manifests/prometheus-operator-serviceMonitor.yaml - ./manifests/prometheus-prometheus.yaml - ./manifests/prometheus-roleBindingConfig.yaml - ./manifests/prometheus-roleBindingSpecificNamespaces.yaml @@ -66,3 +55,14 @@ resources: - ./manifests/prometheus-serviceMonitorKubeControllerManager.yaml - ./manifests/prometheus-serviceMonitorKubeScheduler.yaml - ./manifests/prometheus-serviceMonitorKubelet.yaml +- ./manifests/setup/0namespace-namespace.yaml +- ./manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml +- ./manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml +- ./manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml +- ./manifests/setup/prometheus-operator-0prometheusruleCustomResourceDefinition.yaml +- ./manifests/setup/prometheus-operator-0servicemonitorCustomResourceDefinition.yaml +- ./manifests/setup/prometheus-operator-clusterRole.yaml +- ./manifests/setup/prometheus-operator-clusterRoleBinding.yaml +- ./manifests/setup/prometheus-operator-deployment.yaml +- ./manifests/setup/prometheus-operator-service.yaml +- ./manifests/setup/prometheus-operator-serviceAccount.yaml diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 75a8b63b1b4248f6415c8478b1b411791bd8cf90..9fb51bed91414d177f42aeba04d3849bb4847de7 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -1238,7 +1238,7 @@ items: "query": "label_values(apiserver_request_total{job=\"apiserver\"}, instance)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -1289,7 +1289,7 @@ items: namespace: monitoring - apiVersion: v1 data: - controller-manager.json: |- + cluster-total.json: |- { "__inputs": [ @@ -1299,10 +1299,18 @@ items: ], "annotations": { "list": [ - + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, @@ -1310,199 +1318,782 @@ items: "links": [ ], - "refresh": "", - "rows": [ + "panels": [ { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "columns": [ + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + } + ], + "datasource": "prometheus", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" + ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ - }, - "id": 2, - "interval": null, - "links": [ + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(up{job=\"kube-controller-manager\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + ], - "thresholds": "", - "title": "Up", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ + ], - "valueName": "min" + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" }, { - "aliasColors": { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - }, - "id": 3, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ ], - "spaceLength": 10, - "span": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", - "refId": "A" - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Work Queue Add Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "title": "Current Status", + "type": "table" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 6, + "panels": [ + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 7, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Average Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 8, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Average Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Average Bandwidth", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 9, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth History", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 12, "panels": [ { "aliasColors": { @@ -1511,29 +2102,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 31 }, - "id": 4, + "id": 13, "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1543,16 +2141,17 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1560,10 +2159,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Depth", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1578,7 +2177,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1586,7 +2185,7 @@ items: "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1594,20 +2193,7 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -1615,29 +2201,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 40 }, - "id": 5, + "id": 14, "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1647,16 +2240,17 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1664,10 +2258,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Latency", + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1682,19 +2276,19 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -1703,14 +2297,21 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 15, "panels": [ { "aliasColors": { @@ -1719,29 +2320,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 50 }, - "id": 6, + "id": 16, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1751,37 +2359,17 @@ items: ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1789,10 +2377,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Kube API Request Rate", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1807,19 +2395,19 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -1831,29 +2419,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 59 }, - "id": 7, + "id": 17, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1863,16 +2458,17 @@ items: ], "spaceLength": 10, - "span": 8, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1880,10 +2476,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Post Request Latency 99th Quantile", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1898,7 +2494,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1906,7 +2502,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1919,28 +2515,269 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" - }, + } + ], + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Networking / Cluster", + "uid": "ff635a025bcfea7bc3dd4f508990a3e9", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-cluster-total + namespace: monitoring +- apiVersion: v1 + data: + controller-manager.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ { "collapse": false, "collapsed": false, "panels": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": "$datasource", - "fill": 1, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 8, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{job=\"kube-controller-manager\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, "legend": { "alignAsTable": "true", "avg": false, @@ -1967,15 +2804,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", + "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", + "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], @@ -1984,7 +2821,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Get Request Latency 99th Quantile", + "title": "Work Queue Add Rate", "tooltip": { "shared": false, "sort": 0, @@ -2002,19 +2839,19 @@ items: }, "yaxes": [ { - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -2044,17 +2881,17 @@ items: "gridPos": { }, - "id": 9, + "id": 4, "legend": { - "alignAsTable": false, + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "rightSide": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, @@ -2071,15 +2908,15 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}", + "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], @@ -2088,7 +2925,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory", + "title": "Work Queue Depth", "tooltip": { "shared": false, "sort": 0, @@ -2106,23 +2943,36 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2135,17 +2985,17 @@ items: "gridPos": { }, - "id": 10, + "id": 5, "legend": { - "alignAsTable": false, + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "rightSide": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, @@ -2162,15 +3012,15 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], @@ -2179,7 +3029,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU usage", + "title": "Work Queue Latency", "tooltip": { "shared": false, "sort": 0, @@ -2197,23 +3047,36 @@ items: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2226,7 +3089,7 @@ items: "gridPos": { }, - "id": 11, + "id": 6, "legend": { "alignAsTable": false, "avg": false, @@ -2258,11 +3121,32 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}", + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "2xx", "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], "thresholds": [ @@ -2270,7 +3154,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Goroutines", + "title": "Kube API Request Rate", "tooltip": { "shared": false, "sort": 0, @@ -2288,7 +3172,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -2296,7 +3180,7 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -2304,144 +3188,27 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Controller Manager", - "uid": "72e0e05bef5099e5f049b05fdc429ed4", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-controller-manager - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-cluster.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 1, + "id": 7, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2451,37 +3218,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 8, "stack": false, "steppedLine": false, "targets": [ { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "Post Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2493,7 +3263,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -2501,15 +3271,28 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2519,53 +3302,60 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 2, + "gridPos": { + + }, + "id": 8, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Requests Commitment", + "title": "Get Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2577,7 +3367,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -2585,15 +3375,28 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2603,13 +3406,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 3, + "gridPos": { + + }, + "id": 9, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2619,37 +3426,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Limits Commitment", + "title": "Memory", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2661,20 +3471,20 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -2687,13 +3497,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 4, + "gridPos": { + + }, + "id": 10, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2703,37 +3517,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation", + "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2757,8 +3574,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -2771,13 +3588,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 5, - "legend": { - "avg": false, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2787,37 +3608,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Requests Commitment", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2833,7 +3657,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -2842,10 +3666,131 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Controller Manager", + "uid": "72e0e05bef5099e5f049b05fdc429ed4", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-controller-manager + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-cluster.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ { "aliasColors": { @@ -2856,7 +3801,7 @@ items: "datasource": "$datasource", "fill": 1, "format": "percentunit", - "id": 6, + "id": 1, "legend": { "avg": false, "current": false, @@ -2885,7 +3830,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -2895,7 +3840,7 @@ items: "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Memory Limits Commitment", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -2929,19 +3874,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -2950,8 +3883,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "format": "percentunit", + "id": 2, "legend": { "avg": false, "current": false, @@ -2962,7 +3896,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2975,31 +3909,28 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU Requests Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -3027,19 +3958,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -3049,7 +3968,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "format": "percentunit", + "id": 3, "legend": { "avg": false, "current": false, @@ -3073,254 +3993,280 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 2, "stack": false, "steppedLine": false, - "styles": [ + "targets": [ { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #A", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workloads", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to workloads", - "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #B", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "number", - "unit": "short" - }, + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - }, + ] + }, + "yaxes": [ { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "number", - "unit": "short" - }, + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "percentunit" - }, + ] + }, + "yaxes": [ { - "alias": "Namespace", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", - "pattern": "namespace", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "type": "string", - "unit": "short" - } ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", "instant": true, "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Memory Limits Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -3353,8 +4299,8 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", + "showTitle": false, + "title": "Headlines", "titleSize": "h6" }, { @@ -3370,7 +4316,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, + "id": 7, "legend": { "avg": false, "current": false, @@ -3399,7 +4345,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -3412,7 +4358,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -3430,7 +4376,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -3452,7 +4398,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory", + "title": "CPU", "titleSize": "h6" }, { @@ -3468,7 +4414,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 10, + "id": 8, "legend": { "avg": false, "current": false, @@ -3539,7 +4485,7 @@ items: "unit": "short" }, { - "alias": "Memory Usage", + "alias": "CPU Usage", "colorMode": null, "colors": [ @@ -3554,10 +4500,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -3572,10 +4518,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "CPU Requests %", "colorMode": null, "colors": [ @@ -3593,7 +4539,7 @@ items: "unit": "percentunit" }, { - "alias": "Memory Limits", + "alias": "CPU Limits", "colorMode": null, "colors": [ @@ -3608,10 +4554,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Limits %", + "alias": "CPU Limits %", "colorMode": null, "colors": [ @@ -3682,7 +4628,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3691,7 +4637,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3700,7 +4646,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3709,7 +4655,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3718,7 +4664,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3732,7 +4678,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Requests by Namespace", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, @@ -3773,144 +4719,35 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Requests", + "title": "CPU Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(node_cpu_seconds_total, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Cluster", - "uid": "efa86fd1d0c121a26444b636a3f509a8", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-cluster - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-namespace.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], "nullPointMode": "null as zero", @@ -3927,10 +4764,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -3940,7 +4777,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 0, @@ -3958,7 +4795,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -3980,7 +4817,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Memory", "titleSize": "h6" }, { @@ -3996,7 +4833,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "id": 10, "legend": { "avg": false, "current": false, @@ -4031,7 +4868,43 @@ items: "type": "hidden" }, { - "alias": "CPU Usage", + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", "colorMode": null, "colors": [ @@ -4041,15 +4914,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #A", + "pattern": "Value #C", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests", + "alias": "Memory Requests", "colorMode": null, "colors": [ @@ -4059,15 +4932,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #D", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests %", + "alias": "Memory Requests %", "colorMode": null, "colors": [ @@ -4077,7 +4950,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #E", "thresholds": [ ], @@ -4085,7 +4958,7 @@ items: "unit": "percentunit" }, { - "alias": "CPU Limits", + "alias": "Memory Limits", "colorMode": null, "colors": [ @@ -4095,15 +4968,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #F", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Limits %", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -4113,7 +4986,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #G", "thresholds": [ ], @@ -4121,7 +4994,7 @@ items: "unit": "percentunit" }, { - "alias": "Pod", + "alias": "Namespace", "colorMode": null, "colors": [ @@ -4129,9 +5002,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", "thresholds": [ ], @@ -4156,7 +5029,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4165,7 +5038,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4174,7 +5047,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4183,7 +5056,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4192,13 +5065,31 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 } ], "thresholds": [ @@ -4206,7 +5097,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Requests by Namespace", "tooltip": { "shared": false, "sort": 0, @@ -4247,7 +5138,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Memory Requests", "titleSize": "h6" }, { @@ -4262,8 +5153,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 3, + "fill": 1, + "id": 11, "legend": { "avg": false, "current": false, @@ -4274,7 +5165,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -4288,117 +5179,19 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage (w/o cache)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", @@ -4411,10 +5204,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "Bps" }, { - "alias": "Memory Requests", + "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ @@ -4429,10 +5222,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "Bps" }, { - "alias": "Memory Requests %", + "alias": "Rate of Received Packets", "colorMode": null, "colors": [ @@ -4447,10 +5240,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" }, { - "alias": "Memory Limits", + "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ @@ -4465,10 +5258,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "pps" }, { - "alias": "Memory Limits %", + "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ @@ -4483,10 +5276,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" }, { - "alias": "Memory Usage (RSS)", + "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ @@ -4501,46 +5294,10 @@ items: ], "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "unit": "pps" }, { - "alias": "Pod", + "alias": "Namespace", "colorMode": null, "colors": [ @@ -4548,9 +5305,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", "thresholds": [ ], @@ -4575,7 +5332,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4584,7 +5341,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4593,7 +5350,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4602,7 +5359,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4611,7 +5368,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4620,31 +5377,13 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 } ], "thresholds": [ @@ -4652,7 +5391,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 0, @@ -4693,145 +5432,107 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Network", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Namespace (Pods)", - "uid": "85a562078cdf77779eaa1add43ccec1e", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-namespace - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-node.json: |- - { - "annotations": { - "list": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "refresh": "10s", - "rows": [ + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -4845,7 +5546,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 13, "legend": { "avg": false, "current": false, @@ -4874,10 +5575,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -4887,7 +5588,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -4905,7 +5606,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -4927,7 +5628,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Network", "titleSize": "h6" }, { @@ -4942,8 +5643,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 2, + "fill": 10, + "id": 14, "legend": { "avg": false, "current": false, @@ -4954,7 +5655,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -4968,183 +5669,211 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ + "expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Namespace: Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - }, + ] + }, + "yaxes": [ { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "number", - "unit": "short" - }, + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ + "expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Namespace: Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "percentunit" - }, + ] + }, + "yaxes": [ { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{namespace}}", + "legendLink": null, "step": 10 } ], @@ -5153,14 +5882,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -5172,7 +5900,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -5194,7 +5922,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Network", "titleSize": "h6" }, { @@ -5210,7 +5938,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 17, "legend": { "avg": false, "current": false, @@ -5239,10 +5967,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\", container!=\"\"}) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -5252,7 +5980,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, @@ -5270,7 +5998,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -5292,7 +6020,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Network", "titleSize": "h6" }, { @@ -5307,8 +6035,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 4, + "fill": 10, + "id": 18, "legend": { "avg": false, "current": false, @@ -5319,7 +6047,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -5333,264 +6061,15 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ + "targets": [ { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Usage (RSS)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "H", + "legendFormat": "{{namespace}}", + "legendLink": null, "step": 10 } ], @@ -5599,14 +6078,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -5618,7 +6096,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -5640,7 +6118,105 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", "titleSize": "h6" } ], @@ -5682,7 +6258,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, cluster)", + "query": "label_values(node_cpu_seconds_total, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -5696,29 +6272,37 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "text": "prod", - "value": "prod" + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", - "hide": 0, + "datasource": "prometheus", + "hide": 2, "includeAll": false, - "label": "node", + "label": null, "multi": false, - "name": "node", + "name": "interval", "options": [ - + { + "selected": true, + "text": "4h", + "value": "4h" + } ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", - "refresh": 1, + "query": "4h", + "refresh": 2, "regex": "", - "sort": 2, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false } ] @@ -5753,17 +6337,17 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Compute Resources / Node (Pods)", - "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-node + name: grafana-dashboard-k8s-resources-cluster namespace: monitoring - apiVersion: v1 data: - k8s-resources-pod.json: |- + k8s-resources-namespace.json: |- { "annotations": { "list": [ @@ -5821,10 +6405,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -6015,17 +6599,17 @@ items: "unit": "percentunit" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, + "link": true, "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "container", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -6050,7 +6634,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6059,7 +6643,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6068,7 +6652,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6077,7 +6661,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6086,7 +6670,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6186,26 +6770,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}} (RSS)", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}} (Cache)", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container}} (Swap)", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -6215,7 +6783,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 0, @@ -6450,17 +7018,17 @@ items: "unit": "bytes" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, + "link": true, "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "container", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -6485,7 +7053,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6494,7 +7062,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6503,7 +7071,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6512,7 +7080,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6521,7 +7089,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6530,7 +7098,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6539,7 +7107,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6548,7 +7116,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6605,294 +7173,33 @@ items: "showTitle": true, "title": "Memory Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "pod", - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Pod", - "uid": "6581e46e4e5c7ba40a07646395ef7b23", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-pod - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-workload.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], "nullPointMode": "null as zero", @@ -6915,7 +7222,7 @@ items: "type": "hidden" }, { - "alias": "CPU Usage", + "alias": "Current Receive Bandwidth", "colorMode": null, "colors": [ @@ -6930,10 +7237,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "Bps" }, { - "alias": "CPU Requests", + "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ @@ -6948,10 +7255,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "Bps" }, { - "alias": "CPU Requests %", + "alias": "Rate of Received Packets", "colorMode": null, "colors": [ @@ -6966,10 +7273,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" }, { - "alias": "CPU Limits", + "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ @@ -6984,10 +7291,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "pps" }, { - "alias": "CPU Limits %", + "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ @@ -7002,7 +7309,25 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" }, { "alias": "Pod", @@ -7013,7 +7338,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down", + "linkTooltip": "Drill down to pods", "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -7040,7 +7365,7 @@ items: ], "targets": [ { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7049,7 +7374,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7058,7 +7383,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7067,7 +7392,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7076,13 +7401,22 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } ], "thresholds": [ @@ -7090,7 +7424,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 0, @@ -7131,7 +7465,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Network", "titleSize": "h6" }, { @@ -7147,7 +7481,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 6, "legend": { "avg": false, "current": false, @@ -7176,7 +7510,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -7189,7 +7523,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -7207,7 +7541,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -7229,7 +7563,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Network", "titleSize": "h6" }, { @@ -7244,8 +7578,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 4, + "fill": 10, + "id": 7, "legend": { "avg": false, "current": false, @@ -7256,7 +7590,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -7270,183 +7604,15 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -7455,14 +7621,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -7474,7 +7639,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -7496,202 +7661,7811 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Network", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "workload", - "multi": false, - "name": "workload", - "options": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "type", - "multi": false, - "name": "type", - "options": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-namespace + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-node.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\", container!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "node", + "multi": false, + "name": "node", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Node (Pods)", + "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-node + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (RSS)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (Cache)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (Swap)", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-pod + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workload.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Pod: Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Pod: Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "workload", + "multi": false, + "name": "workload", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "type", + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workload + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workloads-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Workload: Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Workload: Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "$datasource", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", "2d", "7d", "30d" ] }, "timezone": "", - "title": "Kubernetes / Compute Resources / Workload", - "uid": "a164a7f0339f99e89cea5cb47e9be617", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-workload + name: grafana-dashboard-k8s-resources-workloads-namespace namespace: monitoring - apiVersion: v1 data: - k8s-resources-workloads-namespace.json: |- + kubelet.json: |- { + "__inputs": [ + + ], + "__requires": [ + + ], "annotations": { "list": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Running Pods", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Running Container", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ - ], - "refresh": "10s", - "rows": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Actual Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 6, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Desired Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Config Error Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -7701,42 +15475,47 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 1, + "fill": 1, + "gridPos": { + + }, + "id": 8, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" } ], "thresholds": [ @@ -7744,7 +15523,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Operation Rate", "tooltip": { "shared": false, "sort": 0, @@ -7762,35 +15541,23 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -7800,254 +15567,150 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "gridPos": { + + }, + "id": 9, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Running Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, + "targets": [ { - "alias": "Workload", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "type": "number", - "unit": "short" + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation Error Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Workload Type", - "colorMode": null, - "colors": [ + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "id": 10, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" } ], "thresholds": [ @@ -8055,14 +15718,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Operation duration 99th quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -8074,20 +15736,20 @@ items: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -8095,13 +15757,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -8111,42 +15774,152 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 3, + "fill": 1, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 12, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" } ], "thresholds": [ @@ -8154,7 +15927,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Pod Start Duration", "tooltip": { "shared": false, "sort": 0, @@ -8172,7 +15945,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -8180,12 +15953,12 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] } @@ -8193,13 +15966,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Memory Usage", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -8210,254 +15984,48 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "gridPos": { + + }, + "id": 13, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Running Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Workload", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" } ], "thresholds": [ @@ -8465,14 +16033,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Storage Operation Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -8484,7 +16051,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -8492,673 +16059,604 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory Quota", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Namespace (Workloads)", - "uid": "a87fb0d919ec0ea5f6543124e16c42a5", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-workloads-namespace - namespace: monitoring -- apiVersion: v1 - data: - kubelet.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 2, - "interval": null, + "id": 14, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\"})", + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "refId": "A" } ], - "thresholds": "", - "title": "Up", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Error Rate", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 3, - "interval": null, + "id": 15, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": true, + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "refId": "A" } ], - "thresholds": "", - "title": "Running Pods", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Duration 99th quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 4, - "interval": null, + "id": 16, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{operation_type}}", "refId": "A" } ], - "thresholds": "", - "title": "Running Container", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager operation rate", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 5, - "interval": null, + "id": 17, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{operation_type}}", "refId": "A" } ], - "thresholds": "", - "title": "Actual Volume Count", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager 99th quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Pod lifecycle event generator", + "fill": 1, "gridPos": { }, - "id": 6, - "interval": null, + "id": 18, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "", - "title": "Desired Volume Count", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist rate", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 7, - "interval": null, + "id": 19, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "", - "title": "Config Error Count", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist interval", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" + ] } ], "repeat": null, @@ -9185,7 +16683,7 @@ items: "gridPos": { }, - "id": 8, + "id": 20, "legend": { "alignAsTable": "true", "avg": false, @@ -9212,15 +16710,15 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", + "legendFormat": "{{instance}}", "refId": "A" } ], @@ -9229,7 +16727,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Operation Rate", + "title": "PLEG relist duration", "tooltip": { "shared": false, "sort": 0, @@ -9247,23 +16745,36 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -9276,17 +16787,17 @@ items: "gridPos": { }, - "id": 9, + "id": 21, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, "linewidth": 1, @@ -9303,16 +16814,37 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", + "legendFormat": "2xx", "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], "thresholds": [ @@ -9320,7 +16852,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Operation Error Rate", + "title": "RPC Rate", "tooltip": { "shared": false, "sort": 0, @@ -9380,7 +16912,7 @@ items: "gridPos": { }, - "id": 10, + "id": 22, "legend": { "alignAsTable": "true", "avg": false, @@ -9412,10 +16944,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", + "legendFormat": "{{instance}} {{verb}} {{url}}", "refId": "A" } ], @@ -9424,7 +16956,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Operation duration 99th quantile", + "title": "Request duration 99th quantile", "tooltip": { "shared": false, "sort": 0, @@ -9446,7 +16978,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -9454,7 +16986,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -9484,17 +17016,17 @@ items: "gridPos": { }, - "id": 11, + "id": 23, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, "linewidth": 1, @@ -9511,23 +17043,107 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} pod", + "legendFormat": "{{instance}}", "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 24, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -9535,7 +17151,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Rate", + "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, @@ -9553,7 +17169,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9561,7 +17177,7 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9582,17 +17198,17 @@ items: "gridPos": { }, - "id": 12, + "id": 25, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, "linewidth": 1, @@ -9609,23 +17225,16 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} pod", + "legendFormat": "{{instance}}", "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" } ], "thresholds": [ @@ -9633,7 +17242,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Duration", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, @@ -9651,340 +17260,928 @@ items: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } - ] + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(kubelet_runtime_operations{cluster=\"$cluster\", job=\"kubelet\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-kubelet + namespace: monitoring +- apiVersion: v1 + data: + namespace-by-pod.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, { - "collapse": false, - "collapsed": false, - "panels": [ + "columns": [ { - "aliasColors": { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + } + ], + "datasource": "prometheus", + "fill": 1, + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ - }, - "id": 13, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Bandwidth Received", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Bandwidth Transmitted", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Storage Operation Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + "type": "number", + "unit": "Bps" }, { - "aliasColors": { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - }, - "id": 14, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Storage Operation Error Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } + ], + "title": "Current Status", + "type": "table" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, + "panels": [ + ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Bandwidth", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - }, - "id": 15, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": true, - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { - ], - "timeFrom": null, - "timeShift": null, - "title": "Storage Operation Duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, "panels": [ { "aliasColors": { @@ -9993,29 +18190,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 30 }, - "id": 16, + "id": 10, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10025,16 +18229,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{operation_type}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10042,10 +18247,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Cgroup manager operation rate", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10060,7 +18265,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10068,7 +18273,7 @@ items: "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10084,29 +18289,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 30 }, - "id": 17, + "id": 11, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10116,16 +18328,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10133,10 +18346,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Cgroup manager 99th quantile", + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10151,7 +18364,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10159,7 +18372,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10172,14 +18385,21 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 12, "panels": [ { "aliasColors": { @@ -10188,30 +18408,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "description": "Pod lifecycle event generator", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 40 }, - "id": 18, + "id": 13, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10221,16 +18447,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10238,10 +18465,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist rate", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10256,7 +18483,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10264,7 +18491,7 @@ items: "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10280,29 +18507,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 40 }, - "id": 19, + "id": 14, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10312,16 +18546,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10329,10 +18564,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist interval", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10347,7 +18582,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10355,7 +18590,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10368,243 +18603,982 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" - }, + } + ], + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Networking / Namespace (Pods)", + "uid": "8b7a8b326d7a6f1f04244066368c67af", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-namespace-by-pod + namespace: monitoring +- apiVersion: v1 + data: + namespace-by-workload.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "columns": [ + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + } + ], + "datasource": "prometheus", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, { - "aliasColors": { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - }, - "id": 20, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "PLEG relist duration", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "title": "Current Status", + "type": "table" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, "panels": [ { "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 20 }, - "id": 21, + "id": 7, + "interval": null, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, + "percentage": true, + "percentageDecimals": null, "show": true, - "total": false, - "values": false + "values": true }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", "targets": [ { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", "refId": "A" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" } ], - "thresholds": [ + "title": "Average Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { - ], - "timeFrom": null, - "timeShift": null, - "title": "RPC Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 8, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, "show": true, - "values": [ - - ] + "values": true }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A" } - ] + ], + "title": "Average Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Average Bandwidth", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 12, "panels": [ { "aliasColors": { @@ -10613,29 +19587,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 40 }, - "id": 22, + "id": 13, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10646,15 +19627,16 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{verb}} {{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10662,10 +19644,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Request duration 99th quantile", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10680,7 +19662,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10688,7 +19670,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10696,20 +19678,7 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -10717,16 +19686,21 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 40 }, - "id": 23, + "id": 14, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -10735,11 +19709,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10749,16 +19725,17 @@ items: ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10766,10 +19743,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory", + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10784,23 +19761,43 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 15, + "panels": [ { "aliasColors": { @@ -10808,16 +19805,21 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 41 }, - "id": 24, + "id": 16, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -10826,11 +19828,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10840,16 +19844,17 @@ items: ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10857,10 +19862,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU usage", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10875,7 +19880,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10883,7 +19888,7 @@ items: "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10899,16 +19904,21 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 41 }, - "id": 25, + "id": 17, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -10917,11 +19927,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10931,16 +19943,17 @@ items: ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10948,10 +19961,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Goroutines", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10966,19 +19979,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -10987,13 +20000,17 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" } ], - "schemaVersion": 14, + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" @@ -11001,71 +20018,147 @@ items: "templating": { "list": [ { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "kube-system", + "value": "kube-system" }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, + "includeAll": false, "label": null, - "name": "datasource", + "multi": false, + "name": "namespace", "options": [ ], - "query": "prometheus", + "query": "label_values(container_network_receive_packets_total, namespace)", "refresh": 1, "regex": "", - "type": "datasource" + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "prometheus", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", - "hide": 2, + "datasource": "prometheus", + "hide": 0, "includeAll": false, - "label": "cluster", + "label": null, "multi": false, - "name": "cluster", + "name": "resolution", "options": [ - + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } ], - "query": "label_values(kube_pod_info, cluster)", + "query": "30s,5m,1h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, "label": null, "multi": false, - "name": "instance", + "name": "interval", "options": [ - + { + "selected": true, + "text": "4h", + "value": "4h" + } ], - "query": "label_values(kubelet_runtime_operations{cluster=\"$cluster\", job=\"kubelet\"}, instance)", + "query": "4h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false } ] @@ -11100,13 +20193,13 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Kubelet", - "uid": "3138fa155d5915769fbded898ac09fd9", + "title": "Kubernetes / Networking / Namespace (Workload)", + "uid": "bbb2a765a623ae38130206c7d94a160f", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-kubelet + name: grafana-dashboard-namespace-by-workload namespace: monitoring - apiVersion: v1 data: @@ -11438,7 +20531,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "instance:node_memory_swap_io_pages:rate1m{job=\"node-exporter\"}", + "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -11451,7 +20544,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (Swapped Pages)", + "title": "Memory Saturation (Major Page Faults)", "tooltip": { "shared": false, "sort": 0, @@ -12386,10 +21479,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "instance:node_memory_swap_io_pages:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Swap IO", + "legendFormat": "Major page faults", "legendLink": null, "step": 10 } @@ -12399,7 +21492,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (pages swapped per second)", + "title": "Memory Saturation (Major Page Faults)", "tooltip": { "shared": false, "sort": 0, @@ -13470,8 +22563,241 @@ items: "text": "N/A", "value": "null" } - ], - "valueName": "current" + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ read| written/", + "yaxis": 1 + }, + { + "alias": "/ io time/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{device}} read", + "refId": "A" + }, + { + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{device}} written", + "refId": "B" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{device}} io time", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "used", + "color": "#E0B400" + }, + { + "alias": "available", + "color": "#73BF69" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "used", + "refId": "A" + }, + { + "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "available", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], "repeat": null, @@ -13498,7 +22824,7 @@ items: "gridPos": { }, - "id": 6, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -13522,14 +22848,7 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - { - "alias": "/ read| written/", - "yaxis": 1 - }, - { - "alias": "/ io time/", - "yaxis": 2 - } + ], "spaceLength": 10, "span": 6, @@ -13537,28 +22856,104 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "format": "time_series", "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{device}} read", + "legendFormat": "{{device}}", "refId": "A" - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}} written", - "refId": "B" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "format": "time_series", "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{device}} io time", - "refId": "C" + "legendFormat": "{{device}}", + "refId": "A" } ], "thresholds": [ @@ -13566,7 +22961,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk I/O", + "title": "Network Transmitted", "tooltip": { "shared": false, "sort": 0, @@ -13580,27 +22975,155 @@ items: "show": true, "values": [ - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Nodes", + "uid": "fa49a4706d07a042595b664c87fb33ea", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-nodes + namespace: monitoring +- apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -13613,17 +23136,17 @@ items: "gridPos": { }, - "id": 7, + "id": 2, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, "rightSide": false, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -13637,32 +23160,25 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - { - "alias": "used", - "color": "#E0B400" - }, - { - "alias": "available", - "color": "#73BF69" - } + ], "spaceLength": 10, - "span": 6, + "span": 9, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "used", + "intervalFactor": 1, + "legendFormat": "Used Space", "refId": "A" }, { - "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "available", + "intervalFactor": 1, + "legendFormat": "Free Space", "refId": "B" } ], @@ -13671,7 +23187,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk Space Usage", + "title": "Volume Space Usage", "tooltip": { "shared": false, "sort": 0, @@ -13705,6 +23221,90 @@ items: "show": true } ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume Space Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" } ], "repeat": null, @@ -13727,21 +23327,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 0, + "fill": 1, "gridPos": { }, - "id": 8, + "id": 4, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, "rightSide": false, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -13758,17 +23358,23 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 9, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", + "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}}", + "intervalFactor": 1, + "legendFormat": "Used inodes", "refId": "A" + }, + { + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": " Free inodes", + "refId": "B" } ], "thresholds": [ @@ -13776,7 +23382,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Received", + "title": "Volume inodes Usage", "tooltip": { "shared": false, "sort": 0, @@ -13794,7 +23400,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -13802,7 +23408,7 @@ items: "show": true }, { - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -13812,96 +23418,88 @@ items: ] }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], "datasource": "$datasource", - "fill": 0, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, + "id": 5, + "interval": null, "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", + "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", "format": "time_series", - "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{device}}", + "legendFormat": "", "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Transmitted", + "thresholds": "80, 90", + "title": "Volume inodes Usage", "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "shared": false }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "op": "=", + "text": "N/A", + "value": "null" } - ] + ], + "valueName": "current" } ], "repeat": null, @@ -13916,7 +23514,7 @@ items: "schemaVersion": 14, "style": "dark", "tags": [ - + "kubernetes-mixin" ], "templating": { "list": [ @@ -13940,20 +23538,72 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, "includeAll": false, - "label": null, + "label": "Namespace", "multi": false, - "name": "instance", + "name": "namespace", "options": [ ], - "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -13965,7 +23615,7 @@ items: ] }, "time": { - "from": "now-1h", + "from": "now-7d", "to": "now" }, "timepicker": { @@ -13994,42 +23644,550 @@ items: ] }, "timezone": "", - "title": "Nodes", - "uid": "fa49a4706d07a042595b664c87fb33ea", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-nodes + name: grafana-dashboard-persistentvolumesusage namespace: monitoring - apiVersion: v1 data: - persistentvolumesusage.json: |- + pod-total.json: |- { "__inputs": [ - ], - "__requires": [ + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { - ], - "annotations": { - "list": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "refresh": "", - "rows": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, { - "collapse": false, - "collapsed": false, + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 8, "panels": [ { "aliasColors": { @@ -14038,29 +24196,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 21 }, - "id": 2, + "id": 9, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, "rightSide": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -14070,23 +24235,17 @@ items: ], "spaceLength": 10, - "span": 9, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used Space", - "refId": "A" - }, - { - "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Free Space", - "refId": "B" + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -14094,10 +24253,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Volume Space Usage", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -14112,7 +24271,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14120,7 +24279,7 @@ items: "show": true }, { - "format": "bytes", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14130,101 +24289,123 @@ items: ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 21 }, - "id": 3, - "interval": null, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "80, 90", - "title": "Volume Space Usage", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, "panels": [ { "aliasColors": { @@ -14232,30 +24413,37 @@ items: }, "bars": false, "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 32 }, - "id": 4, + "id": 12, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, "rightSide": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -14265,23 +24453,17 @@ items: ], "spaceLength": 10, - "span": 9, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used inodes", - "refId": "A" - }, - { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": " Free inodes", - "refId": "B" + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -14289,10 +24471,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Volume inodes Usage", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -14307,7 +24489,7 @@ items: }, "yaxes": [ { - "format": "none", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14315,7 +24497,7 @@ items: "show": true }, { - "format": "none", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14325,100 +24507,119 @@ items: ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 32 }, - "id": 5, - "interval": null, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "80, 90", - "title": "Volume inodes Usage", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" } ], - "schemaVersion": 14, + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" @@ -14426,39 +24627,61 @@ items: "templating": { "list": [ { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "kube-system", + "value": "kube-system" }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, + "includeAll": true, "label": null, - "name": "datasource", + "multi": false, + "name": "namespace", "options": [ ], - "query": "prometheus", + "query": "label_values(container_network_receive_packets_total, namespace)", "refresh": 1, "regex": "", - "type": "datasource" + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, { - "allValue": null, + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "", + "value": "" }, - "datasource": "$datasource", - "hide": 2, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "hide": 0, "includeAll": false, - "label": "cluster", + "label": null, "multi": false, - "name": "cluster", + "name": "pod", "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", - "refresh": 2, + "query": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "refresh": 1, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -14469,60 +24692,88 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", + "datasource": "prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": null, "multi": false, - "name": "namespace", + "name": "resolution", "options": [ - + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "query": "30s,5m,1h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", - "hide": 0, + "datasource": "prometheus", + "hide": 2, "includeAll": false, - "label": "PersistentVolumeClaim", + "label": null, "multi": false, - "name": "volume", + "name": "interval", "options": [ - + { + "selected": true, + "text": "4h", + "value": "4h" + } ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "query": "4h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false } ] }, "time": { - "from": "now-7d", + "from": "now-1h", "to": "now" }, "timepicker": { @@ -14551,13 +24802,13 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Persistent Volumes", - "uid": "919b92a8e8041bd567af9edab12c840c", + "title": "Kubernetes / Networking / Pod", + "uid": "7a18067ce943a40ae25454675c19ff5c", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-persistentvolumesusage + name: grafana-dashboard-pod-total namespace: monitoring - apiVersion: v1 data: @@ -15096,7 +25347,7 @@ items: "query": "label_values(kube_pod_info, cluster)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -15122,7 +25373,7 @@ items: "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -15148,7 +25399,7 @@ items: "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -15174,7 +25425,7 @@ items: "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -15873,7 +26124,7 @@ items: }, { "collapse": false, - "collapsed": false, + "collapsed": false,∠"panels": [ { "aliasColors": { @@ -15911,10 +26162,13 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - + { + "alias": "/max_shards/", + "yaxis": 2 + } ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, "targets": [ @@ -15965,7 +26219,20 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shards", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -16109,12 +26376,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", @@ -16126,7 +26393,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Dropped Samples", + "title": "Pending Samples: $queue", "tooltip": { "shared": true, "sort": 0, @@ -16160,7 +26427,20 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shard Details", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -16200,12 +26480,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", @@ -16217,7 +26497,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Failed Samples", + "title": "TSDB Current Segment", "tooltip": { "shared": true, "sort": 0, @@ -16235,7 +26515,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -16291,12 +26571,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", @@ -18959,7 +29239,7 @@ items: "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\"}, instance)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -20001,7 +30281,7 @@ items: "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -20100,7 +30380,270 @@ items: "gridPos": { }, - "id": 2, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "cores", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "CPU", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "GB", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Memory", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "Bps", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Network", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "100px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, "interval": null, "links": [ @@ -20119,7 +30662,7 @@ items: "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": "cores", + "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", @@ -20130,16 +30673,17 @@ items: "to": "null" } ], - "span": 4, + "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, "lineColor": "rgb(31, 120, 193)", - "show": true + "show": false }, "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20147,7 +30691,7 @@ items: } ], "thresholds": "", - "title": "CPU", + "title": "Desired Replicas", "tooltip": { "shared": false }, @@ -20183,7 +30727,7 @@ items: "gridPos": { }, - "id": 3, + "id": 6, "interval": null, "links": [ @@ -20202,7 +30746,7 @@ items: "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": "GB", + "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", @@ -20213,16 +30757,17 @@ items: "to": "null" } ], - "span": 4, + "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, "lineColor": "rgb(31, 120, 193)", - "show": true + "show": false }, "tableColumn": "", "targets": [ { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20230,7 +30775,7 @@ items: } ], "thresholds": "", - "title": "Memory", + "title": "Replicas of current version", "tooltip": { "shared": false }, @@ -20266,430 +30811,1135 @@ items: "gridPos": { }, - "id": 4, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Observed Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 8, "interval": null, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Metadata Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas specified", + "refId": "A" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "Bps", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas created", + "refId": "B" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ + "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "ready", + "refId": "C" + }, { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "replicas of current version", + "refId": "D" + }, + { + "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "updated", + "refId": "E" } ], - "thresholds": "", - "title": "Network", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Replicas", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Name", + "multi": false, + "name": "statefulset", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / StatefulSets", + "uid": "a31c1f46e6f727cb37c0d731a7245005", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-statefulset + namespace: monitoring +- apiVersion: v1 + data: + workload-total.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, - "height": "100px", + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, "panels": [ { + "aliasColors": { + + }, + "breakpoint": "50%", "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "combine": { + "label": "Others", + "threshold": 0 }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 11 }, - "id": 5, + "id": 6, "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true }, - "tableColumn": "", + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", "refId": "A" } ], - "thresholds": "", - "title": "Desired Replicas", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], + "title": "Average Rate of Bytes Received", + "type": "grafana-piechart-panel", "valueName": "current" }, { + "aliasColors": { + + }, + "breakpoint": "50%", "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "combine": { + "label": "Others", + "threshold": 0 }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 11 }, - "id": 6, + "id": 7, "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true }, - "tableColumn": "", + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", "targets": [ { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", "refId": "A" } ], - "thresholds": "", - "title": "Replicas of current version", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], + "title": "Average Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 8, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "panels": [ + { + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 22 }, - "id": 7, - "interval": null, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "", - "title": "Observed Generation", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 22 }, - "id": 8, - "interval": null, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "", - "title": "Metadata Generation", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 14, "panels": [ { "aliasColors": { @@ -20698,16 +31948,21 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 23 }, - "id": 9, + "id": 15, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -20716,11 +31971,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -20730,43 +31987,116 @@ items: ], "spaceLength": 10, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas specified", - "refId": "A" - }, - { - "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas created", - "refId": "B" - }, + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "ready", - "refId": "C" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas of current version", - "refId": "D" - }, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ { - "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "updated", - "refId": "E" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -20774,10 +32104,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Replicas", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -20792,19 +32122,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -20813,13 +32143,17 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" } ], - "schemaVersion": 14, + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" @@ -20827,39 +32161,61 @@ items: "templating": { "list": [ { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "kube-system", + "value": "kube-system" }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, + "includeAll": true, "label": null, - "name": "datasource", + "multi": false, + "name": "namespace", "options": [ ], - "query": "prometheus", + "query": "label_values(container_network_receive_packets_total, namespace)", "refresh": 1, "regex": "", - "type": "datasource" + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "", + "value": "" }, - "datasource": "$datasource", - "hide": 2, + "datasource": "prometheus", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", + "hide": 0, "includeAll": false, - "label": "cluster", + "label": null, "multi": false, - "name": "cluster", + "name": "workload", "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation, cluster)", - "refresh": 2, + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", + "refresh": 1, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -20870,21 +32226,27 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "deployment", + "value": "deployment" }, - "datasource": "$datasource", + "datasource": "prometheus", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": null, "multi": false, - "name": "namespace", + "name": "type", "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", - "refresh": 2, + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", + "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [ @@ -20896,28 +32258,82 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", + "datasource": "prometheus", "hide": 0, "includeAll": false, - "label": "Name", + "label": null, "multi": false, - "name": "statefulset", + "name": "resolution", "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false } ] @@ -20952,12 +32368,12 @@ items: ] }, "timezone": "", - "title": "Kubernetes / StatefulSets", - "uid": "a31c1f46e6f727cb37c0d731a7245005", + "title": "Kubernetes / Networking / Workload", + "uid": "728bf77cc1166d2f3133bf25846876cc", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-statefulset + name: grafana-dashboard-workload-total namespace: monitoring kind: ConfigMapList diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index e4bd4d734242e9e9b180855fb02f2ead12b2d9de..aec58617ea8312dc7538f759193528ec0bca7839 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -45,6 +45,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/apiserver name: grafana-dashboard-apiserver readOnly: false + - mountPath: /grafana-dashboard-definitions/0/cluster-total + name: grafana-dashboard-cluster-total + readOnly: false - mountPath: /grafana-dashboard-definitions/0/controller-manager name: grafana-dashboard-controller-manager readOnly: false @@ -69,6 +72,12 @@ spec: - mountPath: /grafana-dashboard-definitions/0/kubelet name: grafana-dashboard-kubelet readOnly: false + - mountPath: /grafana-dashboard-definitions/0/namespace-by-pod + name: grafana-dashboard-namespace-by-pod + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/namespace-by-workload + name: grafana-dashboard-namespace-by-workload + readOnly: false - mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use name: grafana-dashboard-node-cluster-rsrc-use readOnly: false @@ -81,6 +90,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage name: grafana-dashboard-persistentvolumesusage readOnly: false + - mountPath: /grafana-dashboard-definitions/0/pod-total + name: grafana-dashboard-pod-total + readOnly: false - mountPath: /grafana-dashboard-definitions/0/pods name: grafana-dashboard-pods readOnly: false @@ -99,6 +111,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/statefulset name: grafana-dashboard-statefulset readOnly: false + - mountPath: /grafana-dashboard-definitions/0/workload-total + name: grafana-dashboard-workload-total + readOnly: false nodeSelector: beta.kubernetes.io/os: linux securityContext: @@ -117,6 +132,9 @@ spec: - configMap: name: grafana-dashboard-apiserver name: grafana-dashboard-apiserver + - configMap: + name: grafana-dashboard-cluster-total + name: grafana-dashboard-cluster-total - configMap: name: grafana-dashboard-controller-manager name: grafana-dashboard-controller-manager @@ -141,6 +159,12 @@ spec: - configMap: name: grafana-dashboard-kubelet name: grafana-dashboard-kubelet + - configMap: + name: grafana-dashboard-namespace-by-pod + name: grafana-dashboard-namespace-by-pod + - configMap: + name: grafana-dashboard-namespace-by-workload + name: grafana-dashboard-namespace-by-workload - configMap: name: grafana-dashboard-node-cluster-rsrc-use name: grafana-dashboard-node-cluster-rsrc-use @@ -153,6 +177,9 @@ spec: - configMap: name: grafana-dashboard-persistentvolumesusage name: grafana-dashboard-persistentvolumesusage + - configMap: + name: grafana-dashboard-pod-total + name: grafana-dashboard-pod-total - configMap: name: grafana-dashboard-pods name: grafana-dashboard-pods @@ -171,3 +198,6 @@ spec: - configMap: name: grafana-dashboard-statefulset name: grafana-dashboard-statefulset + - configMap: + name: grafana-dashboard-workload-total + name: grafana-dashboard-workload-total diff --git a/manifests/kube-state-metrics-deployment.yaml b/manifests/kube-state-metrics-deployment.yaml index 065e87035254dbd8480cf19caa32fdcf1cef8482..b8ff279504f709eac7c466d67550a0f468d262a0 100644 --- a/manifests/kube-state-metrics-deployment.yaml +++ b/manifests/kube-state-metrics-deployment.yaml @@ -55,7 +55,7 @@ spec: - --port=8081 - --telemetry-host=127.0.0.1 - --telemetry-port=8082 - image: quay.io/coreos/kube-state-metrics:v1.7.2 + image: quay.io/coreos/kube-state-metrics:v1.8.0 name: kube-state-metrics resources: limits: diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml index 0a6ce3b11c8455d6b1fd48ca5fce06c8093d4654..436c462e43d9d1ab3c59884d96775016a23acbdc 100644 --- a/manifests/node-exporter-daemonset.yaml +++ b/manifests/node-exporter-daemonset.yaml @@ -61,7 +61,7 @@ spec: resources: limits: cpu: 20m - memory: 60Mi + memory: 40Mi requests: cpu: 10m memory: 20Mi diff --git a/manifests/0prometheus-operator-serviceMonitor.yaml b/manifests/prometheus-operator-serviceMonitor.yaml similarity index 83% rename from manifests/0prometheus-operator-serviceMonitor.yaml rename to manifests/prometheus-operator-serviceMonitor.yaml index 828c9c6a3c6583e9b16aae3e4fdbfe3302d219e2..be1642054971f9941d8b04b889c5d3c4be77c8e1 100644 --- a/manifests/0prometheus-operator-serviceMonitor.yaml +++ b/manifests/prometheus-operator-serviceMonitor.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 name: prometheus-operator namespace: monitoring spec: @@ -15,4 +15,4 @@ spec: matchLabels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 63a606c4afd9144da926039b722b7ff1fdec0c32..93334a9e3c1641a88804f66fcb4b4766c33eeffb 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -37,12 +37,8 @@ spec: ) record: instance:node_memory_utilisation:ratio - expr: | - ( - rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + - rate(node_vmstat_pgpgout{job="node-exporter"}[1m]) - ) - record: instance:node_memory_swap_io_pages:rate1m + rate(node_vmstat_pgmajfault{job="node-exporter"}[1m]) + record: instance:node_vmstat_pgmajfault:rate1m - expr: | rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) record: instance_device:node_disk_io_time_seconds:rate1m @@ -69,6 +65,23 @@ spec: rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m]) ) record: instance:node_network_transmit_drop_excluding_lo:rate1m + - name: kube-apiserver.rules + rules: + - expr: | + histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) + labels: + quantile: "0.99" + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) + labels: + quantile: "0.9" + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) + labels: + quantile: "0.5" + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - name: k8s.rules rules: - expr: | @@ -100,14 +113,14 @@ spec: record: namespace:container_memory_usage_bytes:sum - expr: | sum by (namespace, label_name) ( - sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) + sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)) by (namespace, pod) * on (namespace, pod) group_left(label_name) kube_pod_labels{job="kube-state-metrics"} ) record: namespace:kube_pod_container_resource_requests_memory_bytes:sum - expr: | sum by (namespace, label_name) ( - sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) + sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)) by (namespace, pod) * on (namespace, pod) group_left(label_name) kube_pod_labels{job="kube-state-metrics"} ) @@ -192,23 +205,6 @@ spec: labels: quantile: "0.5" record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - - name: kube-apiserver.rules - rules: - - expr: | - histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) - labels: - quantile: "0.99" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) - labels: - quantile: "0.9" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) - labels: - quantile: "0.5" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - name: node.rules rules: - expr: sum(min(kube_pod_info) by (node)) @@ -224,8 +220,16 @@ spec: )) record: node:node_num_cpu:sum - expr: | - sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - record: :node_memory_MemFreeCachedBuffers_bytes:sum + sum( + node_memory_MemAvailable_bytes{job="node-exporter"} or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + node_memory_Cached_bytes{job="node-exporter"} + + node_memory_MemFree_bytes{job="node-exporter"} + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + record: :node_memory_MemAvailable_bytes:sum - name: kube-prometheus-node-recording.rules rules: - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY @@ -403,98 +407,6 @@ spec: for: 1h labels: severity: warning - - name: kubernetes-absent - rules: - - alert: AlertmanagerDown - annotations: - message: Alertmanager has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown - expr: | - absent(up{job="alertmanager-main",namespace="monitoring"} == 1) - for: 15m - labels: - severity: critical - - alert: CoreDNSDown - annotations: - message: CoreDNS has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-corednsdown - expr: | - absent(up{job="kube-dns"} == 1) - for: 15m - labels: - severity: critical - - alert: KubeAPIDown - annotations: - message: KubeAPI has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown - expr: | - absent(up{job="apiserver"} == 1) - for: 15m - labels: - severity: critical - - alert: KubeControllerManagerDown - annotations: - message: KubeControllerManager has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown - expr: | - absent(up{job="kube-controller-manager"} == 1) - for: 15m - labels: - severity: critical - - alert: KubeSchedulerDown - annotations: - message: KubeScheduler has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown - expr: | - absent(up{job="kube-scheduler"} == 1) - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsDown - annotations: - message: KubeStateMetrics has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown - expr: | - absent(up{job="kube-state-metrics"} == 1) - for: 15m - labels: - severity: critical - - alert: KubeletDown - annotations: - message: Kubelet has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown - expr: | - absent(up{job="kubelet"} == 1) - for: 15m - labels: - severity: critical - - alert: NodeExporterDown - annotations: - message: NodeExporter has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown - expr: | - absent(up{job="node-exporter"} == 1) - for: 15m - labels: - severity: critical - - alert: PrometheusDown - annotations: - message: Prometheus has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown - expr: | - absent(up{job="prometheus-k8s",namespace="monitoring"} == 1) - for: 15m - labels: - severity: critical - - alert: PrometheusOperatorDown - annotations: - message: PrometheusOperator has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown - expr: | - absent(up{job="prometheus-operator",namespace="monitoring"} == 1) - for: 15m - labels: - severity: critical - name: kubernetes-apps rules: - alert: KubePodCrashLooping @@ -599,6 +511,16 @@ spec: for: 15m labels: severity: critical + - alert: KubeContainerWaiting + annotations: + message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} + has been in waiting state for longer than 1 hour. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting + expr: | + sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0 + for: 1h + labels: + severity: warning - alert: KubeDaemonSetNotScheduled annotations: message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset @@ -789,7 +711,7 @@ spec: ) < 0.15 and predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0 - for: 5m + for: 1h labels: severity: critical - alert: KubePersistentVolumeErrors @@ -804,15 +726,6 @@ spec: severity: critical - name: kubernetes-system rules: - - alert: KubeNodeNotReady - annotations: - message: '{{ $labels.node }} has been unready for more than 15 minutes.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready - expr: | - kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 - for: 15m - labels: - severity: warning - alert: KubeVersionMismatch annotations: message: There are {{ $value }} different semantic versions of Kubernetes @@ -836,23 +749,15 @@ spec: for: 15m labels: severity: warning - - alert: KubeletTooManyPods - annotations: - message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage - }} of its Pod capacity. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods - expr: | - max(max(kubelet_running_pod_count{job="kubelet"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"}) by(node) > 0.95 - for: 15m - labels: - severity: warning + - name: kubernetes-system-apiserver + rules: - alert: KubeAPILatencyHigh annotations: message: The API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1 + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"} > 1 for: 10m labels: severity: warning @@ -862,7 +767,7 @@ spec: for {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4 + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"} > 4 for: 10m labels: severity: critical @@ -872,7 +777,7 @@ spec: }} of requests. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) + sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) / sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.03 for: 10m @@ -884,7 +789,7 @@ spec: }} of requests. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) + sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) / sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.01 for: 10m @@ -897,7 +802,7 @@ spec: }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb) / sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.10 for: 10m @@ -910,7 +815,7 @@ spec: }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb) / sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05 for: 10m @@ -934,6 +839,75 @@ spec: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 labels: severity: critical + - alert: KubeAPIDown + annotations: + message: KubeAPI has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown + expr: | + absent(up{job="apiserver"} == 1) + for: 15m + labels: + severity: critical + - name: kubernetes-system-kubelet + rules: + - alert: KubeNodeNotReady + annotations: + message: '{{ $labels.node }} has been unready for more than 15 minutes.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready + expr: | + kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 + for: 15m + labels: + severity: warning + - alert: KubeNodeUnreachable + annotations: + message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable + expr: | + kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1 + labels: + severity: warning + - alert: KubeletTooManyPods + annotations: + message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage + }} of its Pod capacity. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods + expr: | + max(max(kubelet_running_pod_count{job="kubelet"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"}) by(node) > 0.95 + for: 15m + labels: + severity: warning + - alert: KubeletDown + annotations: + message: Kubelet has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown + expr: | + absent(up{job="kubelet"} == 1) + for: 15m + labels: + severity: critical + - name: kubernetes-system-scheduler + rules: + - alert: KubeSchedulerDown + annotations: + message: KubeScheduler has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown + expr: | + absent(up{job="kube-scheduler"} == 1) + for: 15m + labels: + severity: critical + - name: kubernetes-system-controller-manager + rules: + - alert: KubeControllerManagerDown + annotations: + message: KubeControllerManager has disappeared from Prometheus target discovery. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown + expr: | + absent(up{job="kube-controller-manager"} == 1) + for: 15m + labels: + severity: critical - name: prometheus rules: - alert: PrometheusBadConfig @@ -1101,8 +1075,8 @@ spec: - alert: PrometheusRemoteWriteDesiredShards annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write - desired shards calculation wants to run {{ printf $value }} shards, which - is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` + desired shards calculation wants to run {{ $value }} shards, which is more + than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}. summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. @@ -1111,7 +1085,7 @@ spec: # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. ( max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m]) - > on(job, instance) group_right + > max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m]) ) for: 15m @@ -1171,8 +1145,8 @@ spec: rules: - alert: TargetDown annotations: - message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }} targets are - down.' + message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }} targets in + {{ $labels.namespace }} namespace are down.' expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10 for: 10m diff --git a/manifests/prometheus-serviceMonitorKubelet.yaml b/manifests/prometheus-serviceMonitorKubelet.yaml index 91da377a476c45bd2df296add7eaab6a64ee07b1..64edb0e490c924c28b3c20f4857cb1c50b9501ea 100644 --- a/manifests/prometheus-serviceMonitorKubelet.yaml +++ b/manifests/prometheus-serviceMonitorKubelet.yaml @@ -11,6 +11,10 @@ spec: honorLabels: true interval: 30s port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path scheme: https tlsConfig: insecureSkipVerify: true @@ -24,6 +28,10 @@ spec: - __name__ path: /metrics/cadvisor port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path scheme: https tlsConfig: insecureSkipVerify: true diff --git a/manifests/00namespace-namespace.yaml b/manifests/setup/0namespace-namespace.yaml similarity index 100% rename from manifests/00namespace-namespace.yaml rename to manifests/setup/0namespace-namespace.yaml diff --git a/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml similarity index 89% rename from manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml rename to manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml index 43822f9ebe5b2733092779922fc1601fe56d4b8b..17dfb1604c391e63139184b0b8aba41a8a636328 100644 --- a/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml @@ -15,16 +15,16 @@ spec: apiVersion: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string spec: description: 'AlertmanagerSpec is a specification of the desired behavior - of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status' properties: additionalPeers: description: AdditionalPeers allows injecting a set of additional Alertmanagers @@ -626,6 +626,12 @@ spec: items: type: string type: array + configSecret: + description: ConfigSecret is the name of a Kubernetes Secret in the + same namespace as the Alertmanager object, which contains configuration + for this Alertmanager instance. Defaults to 'alertmanager-<alertmanager-name>' + The secret is mounted into /etc/alertmanager/config. + type: string containers: description: Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to an Alertmanager @@ -1051,7 +1057,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -1203,7 +1210,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -1352,7 +1360,124 @@ spec: and is only honored by servers that enable the WindowsGMSA feature flag. type: string + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set in + PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. This field is alpha-level and it is + only honored by servers that enable the WindowsRunAsUserName + feature flag. + type: string + type: object + type: object + startupProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer type: object stdin: description: Whether this container should allocate a buffer for @@ -1918,7 +2043,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -2070,7 +2196,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -2219,8 +2346,125 @@ spec: and is only honored by servers that enable the WindowsGMSA feature flag. type: string + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set in + PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. This field is alpha-level and it is + only honored by servers that enable the WindowsRunAsUserName + feature flag. + type: string type: object type: object + startupProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object stdin: description: Whether this container should allocate a buffer for stdin in the container runtime. If this is not set, reads from @@ -2397,179 +2641,13 @@ spec: If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). - Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency type: string generation: description: A sequence number representing a specific generation of the desired state. Populated by the system. Read-only. format: int64 type: integer - initializers: - description: Initializers tracks the progress of initialization. - properties: - pending: - description: Pending is a list of initializers that must execute - in order before this object is visible. When the last pending - initializer is removed, and no failing result is set, the - initializers struct will be set to nil and the object is considered - as initialized and visible to all clients. - items: - description: Initializer is information about an initializer - that has not yet completed. - properties: - name: - description: name of the process that is responsible for - initializing this object. - type: string - required: - - name - type: object - type: array - result: - description: Status is a return value for calls that don't return - other objects. - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of - this representation of an object. Servers should convert - recognized schemas to the latest internal value, and may - reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' - type: string - code: - description: Suggested HTTP return code for this status, - 0 if not set. - format: int32 - type: integer - details: - description: StatusDetails is a set of additional properties - that MAY be set by the server to provide additional information - about a response. The Reason field of a Status object - defines what attributes will be set. Clients must ignore - fields that do not match the defined type of each attribute, - and should assume that any attribute may be empty, invalid, - or under defined. - properties: - causes: - description: The Causes array includes more details - associated with the StatusReason failure. Not all - StatusReasons may provide detailed causes. - items: - description: StatusCause provides more information - about an api.Status failure, including cases when - multiple errors are encountered. - properties: - field: - description: |- - The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. - - Examples: - "name" - the field "name" on the current resource - "items[0].name" - the field "name" on the first array entry in "items" - type: string - message: - description: A human-readable description of the - cause of the error. This field may be presented - as-is to a reader. - type: string - reason: - description: A machine-readable description of - the cause of the error. If this value is empty - there is no information available. - type: string - type: object - type: array - group: - description: The group attribute of the resource associated - with the status StatusReason. - type: string - kind: - description: 'The kind attribute of the resource associated - with the status StatusReason. On some operations may - differ from the requested resource Kind. More info: - https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - name: - description: The name attribute of the resource associated - with the status StatusReason (when there is a single - name which can be described). - type: string - retryAfterSeconds: - description: If specified, the time in seconds before - the operation should be retried. Some errors may indicate - the client must take an alternate action - for those - errors this field may indicate how long to wait before - taking the alternate action. - format: int32 - type: integer - uid: - description: 'UID of the resource. (when there is a - single resource which can be described). More info: - http://kubernetes.io/docs/user-guide/identifiers#uids' - type: string - type: object - kind: - description: 'Kind is a string value representing the REST - resource this object represents. Servers may infer this - from the endpoint the client submits requests to. Cannot - be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - message: - description: A human-readable description of the status - of this operation. - type: string - metadata: - description: ListMeta describes metadata that synthetic - resources must have, including lists and various status - objects. A resource may have only one of {ObjectMeta, - ListMeta}. - properties: - continue: - description: continue may be set if the user set a limit - on the number of items returned, and indicates that - the server has more data available. The value is opaque - and may be used to issue another request to the endpoint - that served this list to retrieve the next set of - available objects. Continuing a consistent list may - not be possible if the server configuration has changed - or more than a few minutes have passed. The resourceVersion - field returned when using this continue value will - be identical to the value in the first response, unless - you have received this token from an error message. - type: string - remainingItemCount: - description: |- - remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact. - - This field is alpha and can be changed or removed without notice. - format: int64 - type: integer - resourceVersion: - description: 'String that identifies the server''s internal - version of this object that can be used by clients - to determine when objects have changed. Value must - be treated as opaque by clients and passed unmodified - back to the server. Populated by the system. Read-only. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' - type: string - selfLink: - description: selfLink is a URL representing this object. - Populated by the system. Read-only. - type: string - type: object - reason: - description: A machine-readable description of why this - operation is in the "Failure" status. If this value is - empty there is no information available. A Reason clarifies - an HTTP status code but does not override it. - type: string - status: - description: 'Status of the operation. One of: "Success" - or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' - type: string - type: object - required: - - pending - type: object labels: description: 'Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match @@ -2577,10 +2655,13 @@ spec: http://kubernetes.io/docs/user-guide/labels' type: object managedFields: - description: |- - ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. - - This field is alpha and can be changed or removed without notice. + description: ManagedFields maps workflow-id and version to the set + of fields that are managed by that workflow. This is mostly for + internal housekeeping, and users typically shouldn't need to set + or understand this field. A workflow can be the user's name, a + controller's name, or the name of a specific apply path like "ci-cd". + The set of fields is always in the version that the workflow used + when modifying the object. items: description: ManagedFieldsEntry is a workflow-id, a FieldSet and the group version of the resource that the fieldset applies @@ -2593,9 +2674,18 @@ spec: to track the version of a field set because it cannot be automatically converted. type: string - fields: - description: 'Fields stores a set of fields in a data structure - like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + fieldsType: + description: 'FieldsType is the discriminator for the different + fields format and version. There is currently only one possible + value: "FieldsV1"' + type: string + fieldsV1: + description: |- + FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format. + + Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set. + + The exact format is defined in sigs.k8s.io/structured-merge-diff type: object manager: description: Manager is an identifier of the workflow managing @@ -2654,7 +2744,7 @@ spec: controller. type: boolean kind: - description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string name: description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' @@ -2673,11 +2763,13 @@ spec: description: |- An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. - Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency type: string selfLink: - description: SelfLink is a URL representing this object. Populated - by the system. Read-only. + description: |- + SelfLink is a URL representing this object. Populated by the system. Read-only. + + DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release. type: string uid: description: |- @@ -2835,6 +2927,15 @@ spec: credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag. type: string + runAsUserName: + description: The UserName in Windows to run the entrypoint of + the container process. Defaults to the user specified in image + metadata if unspecified. May also be set in PodSecurityContext. + If set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. This + field is alpha-level and it is only honored by servers that + enable the WindowsRunAsUserName feature flag. + type: string type: object type: object serviceAccountName: @@ -2872,13 +2973,13 @@ spec: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized - values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string metadata: description: ObjectMeta is metadata that all persisted resources @@ -2934,190 +3035,13 @@ spec: If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). - Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency type: string generation: description: A sequence number representing a specific generation of the desired state. Populated by the system. Read-only. format: int64 type: integer - initializers: - description: Initializers tracks the progress of initialization. - properties: - pending: - description: Pending is a list of initializers that - must execute in order before this object is visible. - When the last pending initializer is removed, and - no failing result is set, the initializers struct - will be set to nil and the object is considered as - initialized and visible to all clients. - items: - description: Initializer is information about an initializer - that has not yet completed. - properties: - name: - description: name of the process that is responsible - for initializing this object. - type: string - required: - - name - type: object - type: array - result: - description: Status is a return value for calls that - don't return other objects. - properties: - apiVersion: - description: 'APIVersion defines the versioned schema - of this representation of an object. Servers should - convert recognized schemas to the latest internal - value, and may reject unrecognized values. More - info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' - type: string - code: - description: Suggested HTTP return code for this - status, 0 if not set. - format: int32 - type: integer - details: - description: StatusDetails is a set of additional - properties that MAY be set by the server to provide - additional information about a response. The Reason - field of a Status object defines what attributes - will be set. Clients must ignore fields that do - not match the defined type of each attribute, - and should assume that any attribute may be empty, - invalid, or under defined. - properties: - causes: - description: The Causes array includes more - details associated with the StatusReason failure. - Not all StatusReasons may provide detailed - causes. - items: - description: StatusCause provides more information - about an api.Status failure, including cases - when multiple errors are encountered. - properties: - field: - description: |- - The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. - - Examples: - "name" - the field "name" on the current resource - "items[0].name" - the field "name" on the first array entry in "items" - type: string - message: - description: A human-readable description - of the cause of the error. This field - may be presented as-is to a reader. - type: string - reason: - description: A machine-readable description - of the cause of the error. If this value - is empty there is no information available. - type: string - type: object - type: array - group: - description: The group attribute of the resource - associated with the status StatusReason. - type: string - kind: - description: 'The kind attribute of the resource - associated with the status StatusReason. On - some operations may differ from the requested - resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - name: - description: The name attribute of the resource - associated with the status StatusReason (when - there is a single name which can be described). - type: string - retryAfterSeconds: - description: If specified, the time in seconds - before the operation should be retried. Some - errors may indicate the client must take an - alternate action - for those errors this field - may indicate how long to wait before taking - the alternate action. - format: int32 - type: integer - uid: - description: 'UID of the resource. (when there - is a single resource which can be described). - More info: http://kubernetes.io/docs/user-guide/identifiers#uids' - type: string - type: object - kind: - description: 'Kind is a string value representing - the REST resource this object represents. Servers - may infer this from the endpoint the client submits - requests to. Cannot be updated. In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - message: - description: A human-readable description of the - status of this operation. - type: string - metadata: - description: ListMeta describes metadata that synthetic - resources must have, including lists and various - status objects. A resource may have only one of - {ObjectMeta, ListMeta}. - properties: - continue: - description: continue may be set if the user - set a limit on the number of items returned, - and indicates that the server has more data - available. The value is opaque and may be - used to issue another request to the endpoint - that served this list to retrieve the next - set of available objects. Continuing a consistent - list may not be possible if the server configuration - has changed or more than a few minutes have - passed. The resourceVersion field returned - when using this continue value will be identical - to the value in the first response, unless - you have received this token from an error - message. - type: string - remainingItemCount: - description: |- - remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact. - - This field is alpha and can be changed or removed without notice. - format: int64 - type: integer - resourceVersion: - description: 'String that identifies the server''s - internal version of this object that can be - used by clients to determine when objects - have changed. Value must be treated as opaque - by clients and passed unmodified back to the - server. Populated by the system. Read-only. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' - type: string - selfLink: - description: selfLink is a URL representing - this object. Populated by the system. Read-only. - type: string - type: object - reason: - description: A machine-readable description of why - this operation is in the "Failure" status. If - this value is empty there is no information available. - A Reason clarifies an HTTP status code but does - not override it. - type: string - status: - description: 'Status of the operation. One of: "Success" - or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' - type: string - type: object - required: - - pending - type: object labels: description: 'Map of string keys and values that can be used to organize and categorize (scope and select) objects. @@ -3125,10 +3049,14 @@ spec: More info: http://kubernetes.io/docs/user-guide/labels' type: object managedFields: - description: |- - ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. - - This field is alpha and can be changed or removed without notice. + description: ManagedFields maps workflow-id and version + to the set of fields that are managed by that workflow. + This is mostly for internal housekeeping, and users typically + shouldn't need to set or understand this field. A workflow + can be the user's name, a controller's name, or the name + of a specific apply path like "ci-cd". The set of fields + is always in the version that the workflow used when modifying + the object. items: description: ManagedFieldsEntry is a workflow-id, a FieldSet and the group version of the resource that the fieldset @@ -3141,10 +3069,18 @@ spec: field. It is necessary to track the version of a field set because it cannot be automatically converted. type: string - fields: - description: 'Fields stores a set of fields in a data - structure like a Trie. To understand how this is - used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + fieldsType: + description: 'FieldsType is the discriminator for + the different fields format and version. There is + currently only one possible value: "FieldsV1"' + type: string + fieldsV1: + description: |- + FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format. + + Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set. + + The exact format is defined in sigs.k8s.io/structured-merge-diff type: object manager: description: Manager is an identifier of the workflow @@ -3208,7 +3144,7 @@ spec: managing controller. type: boolean kind: - description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string name: description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' @@ -3227,11 +3163,13 @@ spec: description: |- An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. - Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency type: string selfLink: - description: SelfLink is a URL representing this object. - Populated by the system. Read-only. + description: |- + SelfLink is a URL representing this object. Populated by the system. Read-only. + + DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release. type: string uid: description: |- @@ -3597,7 +3535,7 @@ spec: properties: monitors: description: 'Required: Monitors is a collection of Ceph monitors - More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' items: type: string type: array @@ -3608,11 +3546,11 @@ spec: readOnly: description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More - info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: boolean secretFile: description: 'Optional: SecretFile is the path to key ring - for User, default is /etc/ceph/user.secret More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + for User, default is /etc/ceph/user.secret More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: string secretRef: description: LocalObjectReference contains enough information @@ -3625,7 +3563,7 @@ spec: type: object user: description: 'Optional: User is the rados user name, default - is admin More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + is admin More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: string required: - monitors @@ -3640,12 +3578,12 @@ spec: description: 'Filesystem type to mount. Must be a filesystem type supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md' + More info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: string readOnly: description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More - info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md' + info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: boolean secretRef: description: LocalObjectReference contains enough information @@ -3657,8 +3595,8 @@ spec: type: string type: object volumeID: - description: 'volume id used to identify the volume in cinder - More info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md' + description: 'volume id used to identify the volume in cinder. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: string required: - volumeID @@ -3981,16 +3919,16 @@ spec: properties: endpoints: description: 'EndpointsName is the endpoint name that details - Glusterfs topology. More info: https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod' + Glusterfs topology. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: string path: description: 'Path is the Glusterfs volume path. More info: - https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod' + https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: string readOnly: description: 'ReadOnly here will force the Glusterfs volume to be mounted with read-only permissions. Defaults to false. - More info: https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod' + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: boolean required: - endpoints @@ -4437,24 +4375,24 @@ spec: https://kubernetes.io/docs/concepts/storage/volumes#rbd' type: string image: - description: 'The rados image name. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + description: 'The rados image name. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string keyring: description: 'Keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + Default is /etc/ceph/keyring. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string monitors: - description: 'A collection of Ceph monitors. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + description: 'A collection of Ceph monitors. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' items: type: string type: array pool: description: 'The rados pool name. Default is rbd. More info: - https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string readOnly: description: 'ReadOnly here will force the ReadOnly setting - in VolumeMounts. Defaults to false. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + in VolumeMounts. Defaults to false. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: boolean secretRef: description: LocalObjectReference contains enough information @@ -4467,7 +4405,7 @@ spec: type: object user: description: 'The rados user name. Default is admin. More - info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string required: - monitors @@ -4654,7 +4592,7 @@ spec: status: description: 'AlertmanagerStatus is the most recent observed status of the Alertmanager cluster. Read-only. Not included when requesting from the - apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status' properties: availableReplicas: description: Total number of available pods (ready for at least minReadySeconds) diff --git a/manifests/0prometheus-operator-0podmonitorCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml similarity index 96% rename from manifests/0prometheus-operator-0podmonitorCustomResourceDefinition.yaml rename to manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml index ff67150faded62d607e8d8902b9a4e6185f9ce74..8062b3dda8c082ca8eb51b17dd332063e94c5066 100644 --- a/manifests/0prometheus-operator-0podmonitorCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml @@ -15,12 +15,12 @@ spec: apiVersion: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string spec: description: PodMonitorSpec contains specification parameters for a PodMonitor. @@ -52,6 +52,10 @@ spec: description: HonorLabels chooses the metric's labels on collisions with target labels. type: boolean + honorTimestamps: + description: HonorTimestamps controls whether Prometheus respects + the timestamps present in scraped data. + type: boolean interval: description: Interval at which metrics should be scraped type: string diff --git a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml similarity index 90% rename from manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml rename to manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml index bcf97c2c784f79201b5b83f3007b8166648dfeb3..9c450819cc7245655484b350efaca762bf020394 100644 --- a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml @@ -15,12 +15,12 @@ spec: apiVersion: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string spec: description: 'PrometheusSpec is a specification of the desired behavior @@ -693,18 +693,40 @@ spec: tlsConfig: description: TLSConfig specifies TLS configuration parameters. properties: + ca: {} caFile: - description: The CA cert to use for the targets. + description: Path to the CA cert in the Prometheus container + to use for the targets. type: string + cert: {} certFile: - description: The client cert file for the targets. + description: Path to the client cert file in the Prometheus + container for the targets. type: string insecureSkipVerify: description: Disable target certificate validation. type: boolean keyFile: - description: The client key file for the targets. + description: Path to the client key file in the Prometheus + container for the targets. type: string + keySecret: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or its key + must be defined + type: boolean + required: + - key + type: object serverName: description: Used to verify the hostname for the targets. type: string @@ -774,18 +796,40 @@ spec: tlsConfig: description: TLSConfig specifies TLS configuration parameters. properties: + ca: {} caFile: - description: The CA cert to use for the targets. + description: Path to the CA cert in the Prometheus container + to use for the targets. type: string + cert: {} certFile: - description: The client cert file for the targets. + description: Path to the client cert file in the Prometheus + container for the targets. type: string insecureSkipVerify: description: Disable target certificate validation. type: boolean keyFile: - description: The client key file for the targets. + description: Path to the client key file in the Prometheus container + for the targets. type: string + keySecret: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object serverName: description: Used to verify the hostname for the targets. type: string @@ -793,6 +837,7 @@ spec: required: - host type: object + arbitraryFSAccessThroughSMs: {} baseImage: description: Base image to use for a Prometheus deployment. type: string @@ -1235,7 +1280,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -1387,7 +1433,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -1536,7 +1583,124 @@ spec: and is only honored by servers that enable the WindowsGMSA feature flag. type: string + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set in + PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. This field is alpha-level and it is + only honored by servers that enable the WindowsRunAsUserName + feature flag. + type: string + type: object + type: object + startupProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer type: object stdin: description: Whether this container should allocate a buffer for @@ -1655,6 +1819,11 @@ spec: authentication authorization via a proxy to ensure only clients authorized to perform these actions can do so. For more information see https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis' type: boolean + enforcedNamespaceLabel: + description: EnforcedNamespaceLabel enforces adding a namespace label + of origin for each alert and metric that is user created. The label + value will always be the namespace of the object that is being created. + type: string evaluationInterval: description: Interval between consecutive evaluations. type: string @@ -1667,6 +1836,12 @@ spec: under. This is necessary to generate correct URLs. This is necessary if Prometheus is not served from root of a DNS name. type: string + ignoreNamespaceSelectors: + description: IgnoreNamespaceSelectors if set to true will ignore NamespaceSelector + settings from the podmonitor and servicemonitor configs, and they + will only discover endpoints within their current namespace. Defaults + to false. + type: boolean image: description: Image if specified has precedence over baseImage, tag and sha combinations. Specifying the version is still necessary to ensure @@ -2117,7 +2292,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -2269,7 +2445,8 @@ spec: successThreshold: description: Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to - 1. Must be 1 for liveness. Minimum value is 1. + 1. Must be 1 for liveness and startup. Minimum value is + 1. format: int32 type: integer tcpSocket: @@ -2418,8 +2595,125 @@ spec: and is only honored by servers that enable the WindowsGMSA feature flag. type: string + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set in + PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. This field is alpha-level and it is + only honored by servers that enable the WindowsRunAsUserName + feature flag. + type: string type: object type: object + startupProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object stdin: description: Whether this container should allocate a buffer for stdin in the container runtime. If this is not set, reads from @@ -2542,6 +2836,15 @@ spec: nodeSelector: description: Define which Nodes the Pods are scheduled on. type: object + overrideHonorLabels: + description: OverrideHonorLabels if set to true overrides all user configured + honor_labels. If HonorLabels is set in ServiceMonitor or PodMonitor + to true, this overrides honor_labels to false. + type: boolean + overrideHonorTimestamps: + description: OverrideHonorTimestamps allows to globally enforce honoring + timestamps in all scrape configs. + type: boolean paused: description: When a Prometheus deployment is paused, no actions except for deletion will be performed on the underlying objects. @@ -2595,179 +2898,13 @@ spec: If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). - Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency type: string generation: description: A sequence number representing a specific generation of the desired state. Populated by the system. Read-only. format: int64 type: integer - initializers: - description: Initializers tracks the progress of initialization. - properties: - pending: - description: Pending is a list of initializers that must execute - in order before this object is visible. When the last pending - initializer is removed, and no failing result is set, the - initializers struct will be set to nil and the object is considered - as initialized and visible to all clients. - items: - description: Initializer is information about an initializer - that has not yet completed. - properties: - name: - description: name of the process that is responsible for - initializing this object. - type: string - required: - - name - type: object - type: array - result: - description: Status is a return value for calls that don't return - other objects. - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of - this representation of an object. Servers should convert - recognized schemas to the latest internal value, and may - reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' - type: string - code: - description: Suggested HTTP return code for this status, - 0 if not set. - format: int32 - type: integer - details: - description: StatusDetails is a set of additional properties - that MAY be set by the server to provide additional information - about a response. The Reason field of a Status object - defines what attributes will be set. Clients must ignore - fields that do not match the defined type of each attribute, - and should assume that any attribute may be empty, invalid, - or under defined. - properties: - causes: - description: The Causes array includes more details - associated with the StatusReason failure. Not all - StatusReasons may provide detailed causes. - items: - description: StatusCause provides more information - about an api.Status failure, including cases when - multiple errors are encountered. - properties: - field: - description: |- - The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. - - Examples: - "name" - the field "name" on the current resource - "items[0].name" - the field "name" on the first array entry in "items" - type: string - message: - description: A human-readable description of the - cause of the error. This field may be presented - as-is to a reader. - type: string - reason: - description: A machine-readable description of - the cause of the error. If this value is empty - there is no information available. - type: string - type: object - type: array - group: - description: The group attribute of the resource associated - with the status StatusReason. - type: string - kind: - description: 'The kind attribute of the resource associated - with the status StatusReason. On some operations may - differ from the requested resource Kind. More info: - https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - name: - description: The name attribute of the resource associated - with the status StatusReason (when there is a single - name which can be described). - type: string - retryAfterSeconds: - description: If specified, the time in seconds before - the operation should be retried. Some errors may indicate - the client must take an alternate action - for those - errors this field may indicate how long to wait before - taking the alternate action. - format: int32 - type: integer - uid: - description: 'UID of the resource. (when there is a - single resource which can be described). More info: - http://kubernetes.io/docs/user-guide/identifiers#uids' - type: string - type: object - kind: - description: 'Kind is a string value representing the REST - resource this object represents. Servers may infer this - from the endpoint the client submits requests to. Cannot - be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - message: - description: A human-readable description of the status - of this operation. - type: string - metadata: - description: ListMeta describes metadata that synthetic - resources must have, including lists and various status - objects. A resource may have only one of {ObjectMeta, - ListMeta}. - properties: - continue: - description: continue may be set if the user set a limit - on the number of items returned, and indicates that - the server has more data available. The value is opaque - and may be used to issue another request to the endpoint - that served this list to retrieve the next set of - available objects. Continuing a consistent list may - not be possible if the server configuration has changed - or more than a few minutes have passed. The resourceVersion - field returned when using this continue value will - be identical to the value in the first response, unless - you have received this token from an error message. - type: string - remainingItemCount: - description: |- - remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact. - - This field is alpha and can be changed or removed without notice. - format: int64 - type: integer - resourceVersion: - description: 'String that identifies the server''s internal - version of this object that can be used by clients - to determine when objects have changed. Value must - be treated as opaque by clients and passed unmodified - back to the server. Populated by the system. Read-only. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' - type: string - selfLink: - description: selfLink is a URL representing this object. - Populated by the system. Read-only. - type: string - type: object - reason: - description: A machine-readable description of why this - operation is in the "Failure" status. If this value is - empty there is no information available. A Reason clarifies - an HTTP status code but does not override it. - type: string - status: - description: 'Status of the operation. One of: "Success" - or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' - type: string - type: object - required: - - pending - type: object labels: description: 'Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match @@ -2775,10 +2912,13 @@ spec: http://kubernetes.io/docs/user-guide/labels' type: object managedFields: - description: |- - ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. - - This field is alpha and can be changed or removed without notice. + description: ManagedFields maps workflow-id and version to the set + of fields that are managed by that workflow. This is mostly for + internal housekeeping, and users typically shouldn't need to set + or understand this field. A workflow can be the user's name, a + controller's name, or the name of a specific apply path like "ci-cd". + The set of fields is always in the version that the workflow used + when modifying the object. items: description: ManagedFieldsEntry is a workflow-id, a FieldSet and the group version of the resource that the fieldset applies @@ -2791,9 +2931,18 @@ spec: to track the version of a field set because it cannot be automatically converted. type: string - fields: - description: 'Fields stores a set of fields in a data structure - like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + fieldsType: + description: 'FieldsType is the discriminator for the different + fields format and version. There is currently only one possible + value: "FieldsV1"' + type: string + fieldsV1: + description: |- + FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format. + + Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set. + + The exact format is defined in sigs.k8s.io/structured-merge-diff type: object manager: description: Manager is an identifier of the workflow managing @@ -2852,7 +3001,7 @@ spec: controller. type: boolean kind: - description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string name: description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' @@ -2871,11 +3020,13 @@ spec: description: |- An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. - Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency type: string selfLink: - description: SelfLink is a URL representing this object. Populated - by the system. Read-only. + description: |- + SelfLink is a URL representing this object. Populated by the system. Read-only. + + DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release. type: string uid: description: |- @@ -3076,18 +3227,40 @@ spec: tlsConfig: description: TLSConfig specifies TLS configuration parameters. properties: + ca: {} caFile: - description: The CA cert to use for the targets. + description: Path to the CA cert in the Prometheus container + to use for the targets. type: string + cert: {} certFile: - description: The client cert file for the targets. + description: Path to the client cert file in the Prometheus + container for the targets. type: string insecureSkipVerify: description: Disable target certificate validation. type: boolean keyFile: - description: The client key file for the targets. + description: Path to the client key file in the Prometheus + container for the targets. type: string + keySecret: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object serverName: description: Used to verify the hostname for the targets. type: string @@ -3202,18 +3375,40 @@ spec: tlsConfig: description: TLSConfig specifies TLS configuration parameters. properties: + ca: {} caFile: - description: The CA cert to use for the targets. + description: Path to the CA cert in the Prometheus container + to use for the targets. type: string + cert: {} certFile: - description: The client cert file for the targets. + description: Path to the client cert file in the Prometheus + container for the targets. type: string insecureSkipVerify: description: Disable target certificate validation. type: boolean keyFile: - description: The client key file for the targets. + description: Path to the client key file in the Prometheus + container for the targets. type: string + keySecret: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object serverName: description: Used to verify the hostname for the targets. type: string @@ -3530,6 +3725,15 @@ spec: credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag. type: string + runAsUserName: + description: The UserName in Windows to run the entrypoint of + the container process. Defaults to the user specified in image + metadata if unspecified. May also be set in PodSecurityContext. + If set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. This + field is alpha-level and it is only honored by servers that + enable the WindowsRunAsUserName feature flag. + type: string type: object type: object serviceAccountName: @@ -3655,13 +3859,13 @@ spec: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized - values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string metadata: description: ObjectMeta is metadata that all persisted resources @@ -3717,190 +3921,13 @@ spec: If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). - Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency type: string generation: description: A sequence number representing a specific generation of the desired state. Populated by the system. Read-only. format: int64 type: integer - initializers: - description: Initializers tracks the progress of initialization. - properties: - pending: - description: Pending is a list of initializers that - must execute in order before this object is visible. - When the last pending initializer is removed, and - no failing result is set, the initializers struct - will be set to nil and the object is considered as - initialized and visible to all clients. - items: - description: Initializer is information about an initializer - that has not yet completed. - properties: - name: - description: name of the process that is responsible - for initializing this object. - type: string - required: - - name - type: object - type: array - result: - description: Status is a return value for calls that - don't return other objects. - properties: - apiVersion: - description: 'APIVersion defines the versioned schema - of this representation of an object. Servers should - convert recognized schemas to the latest internal - value, and may reject unrecognized values. More - info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' - type: string - code: - description: Suggested HTTP return code for this - status, 0 if not set. - format: int32 - type: integer - details: - description: StatusDetails is a set of additional - properties that MAY be set by the server to provide - additional information about a response. The Reason - field of a Status object defines what attributes - will be set. Clients must ignore fields that do - not match the defined type of each attribute, - and should assume that any attribute may be empty, - invalid, or under defined. - properties: - causes: - description: The Causes array includes more - details associated with the StatusReason failure. - Not all StatusReasons may provide detailed - causes. - items: - description: StatusCause provides more information - about an api.Status failure, including cases - when multiple errors are encountered. - properties: - field: - description: |- - The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. - - Examples: - "name" - the field "name" on the current resource - "items[0].name" - the field "name" on the first array entry in "items" - type: string - message: - description: A human-readable description - of the cause of the error. This field - may be presented as-is to a reader. - type: string - reason: - description: A machine-readable description - of the cause of the error. If this value - is empty there is no information available. - type: string - type: object - type: array - group: - description: The group attribute of the resource - associated with the status StatusReason. - type: string - kind: - description: 'The kind attribute of the resource - associated with the status StatusReason. On - some operations may differ from the requested - resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - name: - description: The name attribute of the resource - associated with the status StatusReason (when - there is a single name which can be described). - type: string - retryAfterSeconds: - description: If specified, the time in seconds - before the operation should be retried. Some - errors may indicate the client must take an - alternate action - for those errors this field - may indicate how long to wait before taking - the alternate action. - format: int32 - type: integer - uid: - description: 'UID of the resource. (when there - is a single resource which can be described). - More info: http://kubernetes.io/docs/user-guide/identifiers#uids' - type: string - type: object - kind: - description: 'Kind is a string value representing - the REST resource this object represents. Servers - may infer this from the endpoint the client submits - requests to. Cannot be updated. In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - message: - description: A human-readable description of the - status of this operation. - type: string - metadata: - description: ListMeta describes metadata that synthetic - resources must have, including lists and various - status objects. A resource may have only one of - {ObjectMeta, ListMeta}. - properties: - continue: - description: continue may be set if the user - set a limit on the number of items returned, - and indicates that the server has more data - available. The value is opaque and may be - used to issue another request to the endpoint - that served this list to retrieve the next - set of available objects. Continuing a consistent - list may not be possible if the server configuration - has changed or more than a few minutes have - passed. The resourceVersion field returned - when using this continue value will be identical - to the value in the first response, unless - you have received this token from an error - message. - type: string - remainingItemCount: - description: |- - remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact. - - This field is alpha and can be changed or removed without notice. - format: int64 - type: integer - resourceVersion: - description: 'String that identifies the server''s - internal version of this object that can be - used by clients to determine when objects - have changed. Value must be treated as opaque - by clients and passed unmodified back to the - server. Populated by the system. Read-only. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' - type: string - selfLink: - description: selfLink is a URL representing - this object. Populated by the system. Read-only. - type: string - type: object - reason: - description: A machine-readable description of why - this operation is in the "Failure" status. If - this value is empty there is no information available. - A Reason clarifies an HTTP status code but does - not override it. - type: string - status: - description: 'Status of the operation. One of: "Success" - or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' - type: string - type: object - required: - - pending - type: object labels: description: 'Map of string keys and values that can be used to organize and categorize (scope and select) objects. @@ -3908,10 +3935,14 @@ spec: More info: http://kubernetes.io/docs/user-guide/labels' type: object managedFields: - description: |- - ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. - - This field is alpha and can be changed or removed without notice. + description: ManagedFields maps workflow-id and version + to the set of fields that are managed by that workflow. + This is mostly for internal housekeeping, and users typically + shouldn't need to set or understand this field. A workflow + can be the user's name, a controller's name, or the name + of a specific apply path like "ci-cd". The set of fields + is always in the version that the workflow used when modifying + the object. items: description: ManagedFieldsEntry is a workflow-id, a FieldSet and the group version of the resource that the fieldset @@ -3924,10 +3955,18 @@ spec: field. It is necessary to track the version of a field set because it cannot be automatically converted. type: string - fields: - description: 'Fields stores a set of fields in a data - structure like a Trie. To understand how this is - used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + fieldsType: + description: 'FieldsType is the discriminator for + the different fields format and version. There is + currently only one possible value: "FieldsV1"' + type: string + fieldsV1: + description: |- + FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format. + + Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set. + + The exact format is defined in sigs.k8s.io/structured-merge-diff type: object manager: description: Manager is an identifier of the workflow @@ -3991,7 +4030,7 @@ spec: managing controller. type: boolean kind: - description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string name: description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' @@ -4010,11 +4049,13 @@ spec: description: |- An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. - Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency type: string selfLink: - description: SelfLink is a URL representing this object. - Populated by the system. Read-only. + description: |- + SelfLink is a URL representing this object. Populated by the system. Read-only. + + DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release. type: string uid: description: |- @@ -4210,6 +4251,10 @@ spec: to ensure the Prometheus Operator knows what version of Thanos is being configured. type: string + listenLocal: + description: ListenLocal makes the Thanos sidecar listen on loopback, + so that it does not bind against the Pod IP. + type: boolean objectStorageConfig: description: SecretKeySelector selects a key of a Secret. properties: @@ -4398,7 +4443,7 @@ spec: properties: monitors: description: 'Required: Monitors is a collection of Ceph monitors - More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' items: type: string type: array @@ -4409,11 +4454,11 @@ spec: readOnly: description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More - info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: boolean secretFile: description: 'Optional: SecretFile is the path to key ring - for User, default is /etc/ceph/user.secret More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + for User, default is /etc/ceph/user.secret More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: string secretRef: description: LocalObjectReference contains enough information @@ -4426,7 +4471,7 @@ spec: type: object user: description: 'Optional: User is the rados user name, default - is admin More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it' + is admin More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: string required: - monitors @@ -4441,12 +4486,12 @@ spec: description: 'Filesystem type to mount. Must be a filesystem type supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md' + More info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: string readOnly: description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More - info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md' + info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: boolean secretRef: description: LocalObjectReference contains enough information @@ -4458,8 +4503,8 @@ spec: type: string type: object volumeID: - description: 'volume id used to identify the volume in cinder - More info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md' + description: 'volume id used to identify the volume in cinder. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: string required: - volumeID @@ -4782,16 +4827,16 @@ spec: properties: endpoints: description: 'EndpointsName is the endpoint name that details - Glusterfs topology. More info: https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod' + Glusterfs topology. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: string path: description: 'Path is the Glusterfs volume path. More info: - https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod' + https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: string readOnly: description: 'ReadOnly here will force the Glusterfs volume to be mounted with read-only permissions. Defaults to false. - More info: https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod' + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: boolean required: - endpoints @@ -5238,24 +5283,24 @@ spec: https://kubernetes.io/docs/concepts/storage/volumes#rbd' type: string image: - description: 'The rados image name. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + description: 'The rados image name. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string keyring: description: 'Keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + Default is /etc/ceph/keyring. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string monitors: - description: 'A collection of Ceph monitors. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + description: 'A collection of Ceph monitors. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' items: type: string type: array pool: description: 'The rados pool name. Default is rbd. More info: - https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string readOnly: description: 'ReadOnly here will force the ReadOnly setting - in VolumeMounts. Defaults to false. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + in VolumeMounts. Defaults to false. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: boolean secretRef: description: LocalObjectReference contains enough information @@ -5268,7 +5313,7 @@ spec: type: object user: description: 'The rados user name. Default is admin. More - info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it' + info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string required: - monitors @@ -5459,7 +5504,7 @@ spec: status: description: 'PrometheusStatus is the most recent observed status of the Prometheus cluster. Read-only. Not included when requesting from the apiserver, - only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status' properties: availableReplicas: description: Total number of available pods (ready for at least minReadySeconds) diff --git a/manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0prometheusruleCustomResourceDefinition.yaml similarity index 50% rename from manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml rename to manifests/setup/prometheus-operator-0prometheusruleCustomResourceDefinition.yaml index b48488510b2bbb0b5b63a510ae6824b11b5c736b..49ddbfb86c62498788e4f66e96b61cc7b9f693be 100644 --- a/manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0prometheusruleCustomResourceDefinition.yaml @@ -15,12 +15,12 @@ spec: apiVersion: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string metadata: description: ObjectMeta is metadata that all persisted resources must have, @@ -70,186 +70,26 @@ spec: If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). - Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency type: string generation: description: A sequence number representing a specific generation of the desired state. Populated by the system. Read-only. format: int64 type: integer - initializers: - description: Initializers tracks the progress of initialization. - properties: - pending: - description: Pending is a list of initializers that must execute - in order before this object is visible. When the last pending - initializer is removed, and no failing result is set, the initializers - struct will be set to nil and the object is considered as initialized - and visible to all clients. - items: - description: Initializer is information about an initializer that - has not yet completed. - properties: - name: - description: name of the process that is responsible for initializing - this object. - type: string - required: - - name - type: object - type: array - result: - description: Status is a return value for calls that don't return - other objects. - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of this - representation of an object. Servers should convert recognized - schemas to the latest internal value, and may reject unrecognized - values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' - type: string - code: - description: Suggested HTTP return code for this status, 0 if - not set. - format: int32 - type: integer - details: - description: StatusDetails is a set of additional properties - that MAY be set by the server to provide additional information - about a response. The Reason field of a Status object defines - what attributes will be set. Clients must ignore fields that - do not match the defined type of each attribute, and should - assume that any attribute may be empty, invalid, or under - defined. - properties: - causes: - description: The Causes array includes more details associated - with the StatusReason failure. Not all StatusReasons may - provide detailed causes. - items: - description: StatusCause provides more information about - an api.Status failure, including cases when multiple - errors are encountered. - properties: - field: - description: |- - The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. - - Examples: - "name" - the field "name" on the current resource - "items[0].name" - the field "name" on the first array entry in "items" - type: string - message: - description: A human-readable description of the cause - of the error. This field may be presented as-is - to a reader. - type: string - reason: - description: A machine-readable description of the - cause of the error. If this value is empty there - is no information available. - type: string - type: object - type: array - group: - description: The group attribute of the resource associated - with the status StatusReason. - type: string - kind: - description: 'The kind attribute of the resource associated - with the status StatusReason. On some operations may differ - from the requested resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - name: - description: The name attribute of the resource associated - with the status StatusReason (when there is a single name - which can be described). - type: string - retryAfterSeconds: - description: If specified, the time in seconds before the - operation should be retried. Some errors may indicate - the client must take an alternate action - for those errors - this field may indicate how long to wait before taking - the alternate action. - format: int32 - type: integer - uid: - description: 'UID of the resource. (when there is a single - resource which can be described). More info: http://kubernetes.io/docs/user-guide/identifiers#uids' - type: string - type: object - kind: - description: 'Kind is a string value representing the REST resource - this object represents. Servers may infer this from the endpoint - the client submits requests to. Cannot be updated. In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - message: - description: A human-readable description of the status of this - operation. - type: string - metadata: - description: ListMeta describes metadata that synthetic resources - must have, including lists and various status objects. A resource - may have only one of {ObjectMeta, ListMeta}. - properties: - continue: - description: continue may be set if the user set a limit - on the number of items returned, and indicates that the - server has more data available. The value is opaque and - may be used to issue another request to the endpoint that - served this list to retrieve the next set of available - objects. Continuing a consistent list may not be possible - if the server configuration has changed or more than a - few minutes have passed. The resourceVersion field returned - when using this continue value will be identical to the - value in the first response, unless you have received - this token from an error message. - type: string - remainingItemCount: - description: |- - remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact. - - This field is alpha and can be changed or removed without notice. - format: int64 - type: integer - resourceVersion: - description: 'String that identifies the server''s internal - version of this object that can be used by clients to - determine when objects have changed. Value must be treated - as opaque by clients and passed unmodified back to the - server. Populated by the system. Read-only. More info: - https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' - type: string - selfLink: - description: selfLink is a URL representing this object. - Populated by the system. Read-only. - type: string - type: object - reason: - description: A machine-readable description of why this operation - is in the "Failure" status. If this value is empty there is - no information available. A Reason clarifies an HTTP status - code but does not override it. - type: string - status: - description: 'Status of the operation. One of: "Success" or - "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' - type: string - type: object - required: - - pending - type: object labels: description: 'Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels' type: object managedFields: - description: |- - ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. - - This field is alpha and can be changed or removed without notice. + description: ManagedFields maps workflow-id and version to the set of + fields that are managed by that workflow. This is mostly for internal + housekeeping, and users typically shouldn't need to set or understand + this field. A workflow can be the user's name, a controller's name, + or the name of a specific apply path like "ci-cd". The set of fields + is always in the version that the workflow used when modifying the + object. items: description: ManagedFieldsEntry is a workflow-id, a FieldSet and the group version of the resource that the fieldset applies to. @@ -261,9 +101,18 @@ spec: the version of a field set because it cannot be automatically converted. type: string - fields: - description: 'Fields stores a set of fields in a data structure - like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + fieldsType: + description: 'FieldsType is the discriminator for the different + fields format and version. There is currently only one possible + value: "FieldsV1"' + type: string + fieldsV1: + description: |- + FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format. + + Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set. + + The exact format is defined in sigs.k8s.io/structured-merge-diff type: object manager: description: Manager is an identifier of the workflow managing @@ -321,7 +170,7 @@ spec: description: If true, this reference points to the managing controller. type: boolean kind: - description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string name: description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' @@ -340,11 +189,13 @@ spec: description: |- An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. - Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency type: string selfLink: - description: SelfLink is a URL representing this object. Populated by - the system. Read-only. + description: |- + SelfLink is a URL representing this object. Populated by the system. Read-only. + + DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release. type: string uid: description: |- diff --git a/manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0servicemonitorCustomResourceDefinition.yaml similarity index 84% rename from manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml rename to manifests/setup/prometheus-operator-0servicemonitorCustomResourceDefinition.yaml index 75ccc6735e0991b8dccb85f203d4bb50a68cb516..5283e87d5188dabd91ba33549fb9503f315573d4 100644 --- a/manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0servicemonitorCustomResourceDefinition.yaml @@ -15,12 +15,12 @@ spec: apiVersion: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string spec: description: ServiceMonitorSpec contains specification parameters for a @@ -74,10 +74,31 @@ spec: bearerTokenFile: description: File to read bearer token for scraping targets. type: string + bearerTokenSecret: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be + a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or its key must be + defined + type: boolean + required: + - key + type: object honorLabels: description: HonorLabels chooses the metric's labels on collisions with target labels. type: boolean + honorTimestamps: + description: HonorTimestamps controls whether Prometheus respects + the timestamps present in scraped data. + type: boolean interval: description: Interval at which metrics should be scraped type: string @@ -199,18 +220,40 @@ spec: tlsConfig: description: TLSConfig specifies TLS configuration parameters. properties: + ca: {} caFile: - description: The CA cert to use for the targets. + description: Path to the CA cert in the Prometheus container + to use for the targets. type: string + cert: {} certFile: - description: The client cert file for the targets. + description: Path to the client cert file in the Prometheus + container for the targets. type: string insecureSkipVerify: description: Disable target certificate validation. type: boolean keyFile: - description: The client key file for the targets. + description: Path to the client key file in the Prometheus + container for the targets. type: string + keySecret: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object serverName: description: Used to verify the hostname for the targets. type: string diff --git a/manifests/0prometheus-operator-clusterRole.yaml b/manifests/setup/prometheus-operator-clusterRole.yaml similarity index 96% rename from manifests/0prometheus-operator-clusterRole.yaml rename to manifests/setup/prometheus-operator-clusterRole.yaml index 28379ea309e7c7efdfa5fdb7d75b3149c224472c..bbb37a0f1ed7f2aa5a4fd4144c4c1023cf792f13 100644 --- a/manifests/0prometheus-operator-clusterRole.yaml +++ b/manifests/setup/prometheus-operator-clusterRole.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 name: prometheus-operator rules: - apiGroups: diff --git a/manifests/0prometheus-operator-clusterRoleBinding.yaml b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml similarity index 90% rename from manifests/0prometheus-operator-clusterRoleBinding.yaml rename to manifests/setup/prometheus-operator-clusterRoleBinding.yaml index 41113ff6ce633136df4b239a4163b2c619742701..fc8b1b45e009d19b52fa7d75f6c6963625bf591e 100644 --- a/manifests/0prometheus-operator-clusterRoleBinding.yaml +++ b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 name: prometheus-operator roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/manifests/0prometheus-operator-deployment.yaml b/manifests/setup/prometheus-operator-deployment.yaml similarity index 88% rename from manifests/0prometheus-operator-deployment.yaml rename to manifests/setup/prometheus-operator-deployment.yaml index b95de48916ca66303073bf938cb210a7a19aa422..533c20cd869790edaa5bd09b47bf484d0f9244e8 100644 --- a/manifests/0prometheus-operator-deployment.yaml +++ b/manifests/setup/prometheus-operator-deployment.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 name: prometheus-operator namespace: monitoring spec: @@ -18,15 +18,15 @@ spec: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 spec: containers: - args: - --kubelet-service=kube-system/kubelet - --logtostderr=true - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1 - - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.33.0 - image: quay.io/coreos/prometheus-operator:v0.33.0 + - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.34.0 + image: quay.io/coreos/prometheus-operator:v0.34.0 name: prometheus-operator ports: - containerPort: 8080 diff --git a/manifests/0prometheus-operator-service.yaml b/manifests/setup/prometheus-operator-service.yaml similarity index 90% rename from manifests/0prometheus-operator-service.yaml rename to manifests/setup/prometheus-operator-service.yaml index 68f06c991edb8034cc1a601886a17deb94faa964..7bd218369c256ffd71c0215b00bfae73b28c0508 100644 --- a/manifests/0prometheus-operator-service.yaml +++ b/manifests/setup/prometheus-operator-service.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 name: prometheus-operator namespace: monitoring spec: diff --git a/manifests/0prometheus-operator-serviceAccount.yaml b/manifests/setup/prometheus-operator-serviceAccount.yaml similarity index 83% rename from manifests/0prometheus-operator-serviceAccount.yaml rename to manifests/setup/prometheus-operator-serviceAccount.yaml index d74f8ec2bd462cc184b2eccfc1ab5709b4f78ca9..d100bb15327f4d165ff3f9920abd4f8dc3ea52c4 100644 --- a/manifests/0prometheus-operator-serviceAccount.yaml +++ b/manifests/setup/prometheus-operator-serviceAccount.yaml @@ -4,6 +4,6 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.33.0 + app.kubernetes.io/version: v0.34.0 name: prometheus-operator namespace: monitoring diff --git a/scripts/minikube-start-kvm.sh b/scripts/minikube-start-kvm.sh new file mode 100755 index 0000000000000000000000000000000000000000..da82a95cb1960567a69657dc0751f9bacc76510d --- /dev/null +++ b/scripts/minikube-start-kvm.sh @@ -0,0 +1,12 @@ +#!/bin/bash +minikube delete +minikube addons disable metrics-server +minikube start \ + --vm-driver=kvm2 \ + --kubernetes-version=v1.16.0 \ + --memory=6g \ + --bootstrapper=kubeadm \ + --extra-config=kubelet.authentication-token-webhook=true \ + --extra-config=kubelet.authorization-mode=Webhook \ + --extra-config=scheduler.address=0.0.0.0 \ + --extra-config=controller-manager.address=0.0.0.0 diff --git a/scripts/minikube-start.sh b/scripts/minikube-start.sh new file mode 100755 index 0000000000000000000000000000000000000000..eb29e860cf2a8419ea04e91b92707b12e5ed7e9d --- /dev/null +++ b/scripts/minikube-start.sh @@ -0,0 +1,11 @@ +#!/bin/bash +minikube delete +minikube addons disable metrics-server +minikube start \ + --kubernetes-version=v1.16.0 \ + --memory=6g \ + --bootstrapper=kubeadm \ + --extra-config=kubelet.authentication-token-webhook=true \ + --extra-config=kubelet.authorization-mode=Webhook \ + --extra-config=scheduler.address=0.0.0.0 \ + --extra-config=controller-manager.address=0.0.0.0 diff --git a/scripts/monitoring-deploy.sh b/scripts/monitoring-deploy.sh new file mode 100755 index 0000000000000000000000000000000000000000..7bc20905c658802d568c6535af4f80f784b656f3 --- /dev/null +++ b/scripts/monitoring-deploy.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# create namespace and CRDs +kubectl create -f manifests/setup + +# wait for CRD creation to complete +until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done + +# create monitoring components +kubectl create -f manifests/ + diff --git a/tests/e2e/travis-e2e.sh b/tests/e2e/travis-e2e.sh index aa32d668c2e8cf39524d9471ddd7be9bd29b7ff4..4efb2bd17d9f6abd6969f9e5089244f8adcf59f1 100755 --- a/tests/e2e/travis-e2e.sh +++ b/tests/e2e/travis-e2e.sh @@ -16,13 +16,13 @@ chmod +x kind ./kind create cluster export KUBECONFIG="$(./kind get kubeconfig-path)" -./kubectl apply -f manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml -./kubectl apply -f manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml -./kubectl apply -f manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml -./kubectl apply -f manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml +# create namespace, permissions, and CRDs +./kubectl create -f manifests/setup -# Wait for CRDs to be successfully registered -sleep 10 +# wait for CRD creation to complete +until ./kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done + +# create monitoring components +./kubectl create -f manifests/ -./kubectl apply -f manifests make test-e2e