diff --git a/README.md b/README.md index 3e9e6317562f21532fab5334d59f89e6ff45b694..65e6081ad8bd45883b48120cb6271e900d36e27a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ This stack is meant for cluster monitoring, so it is pre-configured to collect m * [Troubleshooting](#troubleshooting) * [Error retrieving kubelet metrics](#error-retrieving-kubelet-metrics) * [kube-state-metrics resource usage](#kube-state-metrics-resource-usage) +* [Contributing](#contributing) ## Prerequisites @@ -396,3 +397,15 @@ config. They default to: memoryPerNode: '30Mi', } ``` + +## Contributing + +All `.yaml` files in the `/manifests` folder are generated via +[Jsonnet](https://jsonnet.org/). Contributing changes will most likely include +the following process: + +1. Make your changes in the respective `*.jsonnet` file. +2. Commit your changes (This is currently necessary due to our vendoring + process. This is likely to change in the future). +3. Generate dependent `*.yaml` files: `make generate-in-docker`. +4. Commit the generated changes. diff --git a/docs/monitoring-other-namespaces.md b/docs/monitoring-other-namespaces.md new file mode 100644 index 0000000000000000000000000000000000000000..8327ed024b31b2da6afbcdf0ab0fb84738232404 --- /dev/null +++ b/docs/monitoring-other-namespaces.md @@ -0,0 +1,28 @@ +# Monitoring other Kubernetes Namespaces +This guide will help you monitor applications in other Namespaces. By default the RBAC rules are only enabled for the `Default` and `kube-system` Namespace during Install. + +# Setup +You have to give the list of the Namespaces that you want to be able to monitor. +This is done in the variable `prometheus.roleSpecificNamespaces`. You usually set this in your `.jsonnet` file when building the manifests. + +Example to create the needed `Role` and `Rolebindig` for the Namespace `foo` : +``` +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { + _config+:: { + namespace: 'monitoring', + + prometheus+:: { + namespaces: ["default", "kube-system","foo"], + }, + }, +}; + +{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + +{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + +``` diff --git a/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet index e84986f52f000b0487d7e1e2e22a80845896deb2..026acad04508cc97b92b75a3cac0cfeab4163591 100644 --- a/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet +++ b/jsonnet/kube-prometheus/prometheus/prometheus.libsonnet @@ -21,6 +21,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; replicas: 2, rules: {}, renderedRules: {}, + namespaces: ["default", "kube-system",$._config.namespace], }, }, @@ -55,16 +56,20 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; groups: $._config.prometheus.rules.groups, }, }, - roleBindingDefault: + roleBindingSpecificNamespaces: local roleBinding = k.rbac.v1.roleBinding; - roleBinding.new() + - roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + - roleBinding.mixin.metadata.withNamespace('default') + - roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + - roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) + - roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + - roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]), + local newSpecificRoleBinding(namespace) = + roleBinding.new() + + roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + + roleBinding.mixin.metadata.withNamespace(namespace) + + roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) + + roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + + roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: namespace }]); + + local roleBindigList = k.rbac.v1.roleBindingList; + roleBindigList.new([newSpecificRoleBinding(x) for x in $._config.prometheus.namespaces]), clusterRole: local clusterRole = k.rbac.v1.clusterRole; local policyRule = clusterRole.rulesType; @@ -108,16 +113,6 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name + '-config') + roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]), - roleBindingNamespace: - local roleBinding = k.rbac.v1.roleBinding; - - roleBinding.new() + - roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + - roleBinding.mixin.metadata.withNamespace($._config.namespace) + - roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + - roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) + - roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + - roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]), clusterRoleBinding: local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; @@ -127,10 +122,9 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; clusterRoleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) + clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]), - roleKubeSystem: + roleSpecificNamespaces: local role = k.rbac.v1.role; local policyRule = role.rulesType; - local coreRule = policyRule.new() + policyRule.withApiGroups(['']) + policyRule.withResources([ @@ -140,57 +134,15 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 'pods', ]) + policyRule.withVerbs(['get', 'list', 'watch']); - - role.new() + - role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + - role.mixin.metadata.withNamespace('kube-system') + - role.withRules(coreRule), - roleDefault: - local role = k.rbac.v1.role; - local policyRule = role.rulesType; - - local coreRule = policyRule.new() + - policyRule.withApiGroups(['']) + - policyRule.withResources([ - 'nodes', - 'services', - 'endpoints', - 'pods', - ]) + - policyRule.withVerbs(['get', 'list', 'watch']); - - role.new() + - role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + - role.mixin.metadata.withNamespace('default') + - role.withRules(coreRule), - roleBindingKubeSystem: - local roleBinding = k.rbac.v1.roleBinding; - - roleBinding.new() + - roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + - roleBinding.mixin.metadata.withNamespace('kube-system') + - roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + - roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) + - roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) + - roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]), - roleNamespace: - local role = k.rbac.v1.role; - local policyRule = role.rulesType; - - local coreRule = policyRule.new() + - policyRule.withApiGroups(['']) + - policyRule.withResources([ - 'nodes', - 'services', - 'endpoints', - 'pods', - ]) + - policyRule.withVerbs(['get', 'list', 'watch']); - - role.new() + - role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + - role.mixin.metadata.withNamespace($._config.namespace) + - role.withRules(coreRule), + + local newSpecificRole(namespace) = + role.new() + + role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) + + role.mixin.metadata.withNamespace(namespace) + + role.withRules(coreRule); + + local roleList = k.rbac.v1.roleList; + roleList.new([newSpecificRole(x) for x in $._config.prometheus.namespaces]), prometheus: local container = k.core.v1.pod.mixin.spec.containersType; local resourceRequirements = container.mixin.resourcesType; diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 1143970e7e7f7bbb2e83ab42597672f93a222b84..e3fa61c7b6074fa319aaf3ed9257047801336b43 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -4792,7 +4792,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "100 - (avg by (cpu) (irate(node_cpu{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[5m])) * 100)\n", + "expr": "1 - (avg by (cpu) (irate(node_cpu{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[5m])))\n", "format": "time_series", "intervalFactor": 10, "legendFormat": "{{cpu}}", @@ -4822,18 +4822,18 @@ items: }, "yaxes": [ { - "format": "percent", + "format": "percentunit", "label": null, "logBase": 1, - "max": 100, + "max": 1, "min": 0, "show": true }, { - "format": "percent", + "format": "percentunit", "label": null, "logBase": 1, - "max": 100, + "max": 1, "min": 0, "show": true } @@ -4883,21 +4883,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"} * 100", + "expr": "max(node_load1{job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "load 1m", "refId": "A" }, { - "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"} * 100", + "expr": "max(node_load5{job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "load 5m", "refId": "B" }, { - "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"} * 100", + "expr": "max(node_load15{job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "load 15m", @@ -4927,7 +4927,7 @@ items: }, "yaxes": [ { - "format": "percent", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -4935,7 +4935,7 @@ items: "show": true }, { - "format": "percent", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -5002,28 +5002,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n", + "expr": "max(\n node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory used", "refId": "A" }, { - "expr": "node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "max(node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory buffers", "refId": "B" }, { - "expr": "node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "max(node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory cached", "refId": "C" }, { - "expr": "node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "max(node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory free", @@ -5131,7 +5131,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "(\n node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n) * 100\n /\nnode_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n", + "expr": "max(\n (\n (\n node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "" @@ -5215,21 +5215,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (instance) (rate(node_disk_bytes_read{job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "max(rate(node_disk_bytes_read{job=\"node-exporter\", instance=\"$instance\"}[2m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "read", "refId": "A" }, { - "expr": "sum by (instance) (rate(node_disk_bytes_written{job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "max(rate(node_disk_bytes_written{job=\"node-exporter\", instance=\"$instance\"}[2m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "written", "refId": "B" }, { - "expr": "sum by (instance) (rate(node_disk_io_time_ms{job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "max(rate(node_disk_io_time_ms{job=\"node-exporter\", instance=\"$instance\"}[2m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "io time", @@ -5414,7 +5414,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(node_network_receive_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m])", + "expr": "max(rate(node_network_receive_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -5505,7 +5505,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(node_network_transmit_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m])", + "expr": "max(rate(node_network_transmit_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}",