diff --git a/assets/prometheus/rules/etcd2.rules b/assets/prometheus/rules/etcd2.rules
index 4a38894e2463e20620895a8ddd6b52c3b2110ad9..10fa5e8d7e3fdb4f03d8138f192f038f2508df42 100644
--- a/assets/prometheus/rules/etcd2.rules
+++ b/assets/prometheus/rules/etcd2.rules
@@ -29,7 +29,7 @@ ALERT HighNumberOfFailedHTTPRequests
 
 # alert if more than 5% of requests to an HTTP endpoint have failed with a non 4xx response
 ALERT HighNumberOfFailedHTTPRequests
-  IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code!~"4[0-9]{2}"}[5m])) 
+  IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code!~"4[0-9]{2}"}[5m]))
     / sum by(method) (rate(etcd_http_received_total{job="etcd-k8s"}[5m])) > 0.05
   FOR 5m
   LABELS {
diff --git a/hack/cluster-monitoring/deploy b/hack/cluster-monitoring/deploy
index 9ad91eb0ed85afbfc8f65f23bef4125257eea9cf..bcb6a42e429a4da457185dc45e575688053e953e 100755
--- a/hack/cluster-monitoring/deploy
+++ b/hack/cluster-monitoring/deploy
@@ -32,6 +32,12 @@ kctl apply -f manifests/prometheus/prometheus-k8s-service.yaml
 kctl apply -f manifests/alertmanager/alertmanager-config.yaml
 kctl apply -f manifests/alertmanager/alertmanager-service.yaml
 
+# unfortunately statefulsets cannot be changed except for their replica count
+# so we need to make sure that the rule files are created before we create the
+# prometheus resource so it can properly discover the rule files when creating
+# the statefulset
+sleep 5
+
 # `kubectl apply` is currently not working for third party resources so we are
 # using `kubectl create` here for the time being.
 # (https://github.com/kubernetes/kubernetes/issues/29542)
diff --git a/hack/scripts/generate-alertmanager-config-secret.sh b/hack/scripts/generate-alertmanager-config-secret.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b0b4aaef77eb4467a51b2b979cd916b41ceed798
--- /dev/null
+++ b/hack/scripts/generate-alertmanager-config-secret.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+cat <<-EOF
+apiVersion: v1
+kind: Secret
+metadata:
+  name: alertmanager-main
+data:
+  alertmanager.yaml: $(cat assets/alertmanager/alertmanager.yaml | base64 --wrap=0)
+EOF
+
diff --git a/hack/scripts/generate-dashboards-configmap.sh b/hack/scripts/generate-dashboards-configmap.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6e21600e81b4070e23045c51bbad1aa87b3dc0d6
--- /dev/null
+++ b/hack/scripts/generate-dashboards-configmap.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+cat <<-EOF
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: grafana-dashboards
+data:
+EOF
+
+for f in assets/grafana/*
+do
+  echo "  $(basename $f): |+"
+  cat $f | sed "s/^/    /g"
+done
diff --git a/hack/scripts/generate-manifests.sh b/hack/scripts/generate-manifests.sh
index d031b3a4d65a182f11f52fe368570a39b77c273e..bf5f42fa6be1e7f86108f94cebe13faa654f5d25 100755
--- a/hack/scripts/generate-manifests.sh
+++ b/hack/scripts/generate-manifests.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
 
 # Generate Alert Rules ConfigMap
-kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/prometheus/rules/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
+hack/scripts/generate-rules-configmap.sh > manifests/prometheus/prometheus-k8s-rules.yaml
 
 # Generate Dashboard ConfigMap
-kubectl create configmap --dry-run=true grafana-dashboards --from-file=assets/grafana/ -oyaml > manifests/grafana/grafana-dashboards.yaml
+hack/scripts/generate-dashboards-configmap.sh > manifests/grafana/grafana-dashboards.yaml
 
 # Generate Secret for Alertmanager config
-kubectl create secret generic alertmanager-main --dry-run --from-file=assets/alertmanager/alertmanager.yaml -oyaml > manifests/alertmanager/alertmanager-config.yaml
+hack/scripts/generate-alertmanager-config-secret.sh > manifests/alertmanager/alertmanager-config.yaml
 
diff --git a/hack/scripts/generate-rules-configmap.sh b/hack/scripts/generate-rules-configmap.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b8e00fef88bb9f258b38d59ea207846e421d2b4e
--- /dev/null
+++ b/hack/scripts/generate-rules-configmap.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+cat <<-EOF
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: prometheus-k8s-rules
+  labels:
+    role: prometheus-rulefiles
+    prometheus: k8s
+data:
+EOF
+
+for f in assets/prometheus/rules/*.rules
+do
+  echo "  $(basename $f): |+"
+  cat $f | sed "s/^/    /g"
+done
diff --git a/manifests/alertmanager/alertmanager-config.yaml b/manifests/alertmanager/alertmanager-config.yaml
index 49f8c3c49fd99142bb4beed0628b9726b8d0d6ed..eee36b33fae8b956c5dd7c5ef9c95dd7e3432c4d 100644
--- a/manifests/alertmanager/alertmanager-config.yaml
+++ b/manifests/alertmanager/alertmanager-config.yaml
@@ -1,7 +1,6 @@
 apiVersion: v1
-data:
-  alertmanager.yaml: Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0Kcm91dGU6CiAgZ3JvdXBfYnk6IFsnam9iJ10KICBncm91cF93YWl0OiAzMHMKICBncm91cF9pbnRlcnZhbDogNW0KICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJlY2VpdmVyOiAnd2ViaG9vaycKcmVjZWl2ZXJzOgotIG5hbWU6ICd3ZWJob29rJwogIHdlYmhvb2tfY29uZmlnczoKICAtIHVybDogJ2h0dHA6Ly9hbGVydG1hbmFnZXJ3aDozMDUwMC8nCg==
 kind: Secret
 metadata:
-  creationTimestamp: null
   name: alertmanager-main
+data:
+  alertmanager.yaml: Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0Kcm91dGU6CiAgZ3JvdXBfYnk6IFsnam9iJ10KICBncm91cF93YWl0OiAzMHMKICBncm91cF9pbnRlcnZhbDogNW0KICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJlY2VpdmVyOiAnd2ViaG9vaycKcmVjZWl2ZXJzOgotIG5hbWU6ICd3ZWJob29rJwogIHdlYmhvb2tfY29uZmlnczoKICAtIHVybDogJ2h0dHA6Ly9hbGVydG1hbmFnZXJ3aDozMDUwMC8nCg==
diff --git a/manifests/grafana/grafana-dashboards.yaml b/manifests/grafana/grafana-dashboards.yaml
index cba92d4972cf3f0fccefa915ad7de41d9c1bc033..15244d6153107e01a261c1b930802a5db3ce9f23 100644
--- a/manifests/grafana/grafana-dashboards.yaml
+++ b/manifests/grafana/grafana-dashboards.yaml
@@ -1,6 +1,9 @@
 apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: grafana-dashboards
 data:
-  all-nodes-dashboard.json: |
+  all-nodes-dashboard.json: |+
     {
       "dashboard":
     {
@@ -861,7 +864,7 @@ data:
       ],
       "overwrite": true
     }
-  deployment-dashboard.json: |-
+  deployment-dashboard.json: |+
     {
       "dashboard": {
       "__inputs": [
@@ -1678,8 +1681,7 @@ data:
             }
         ],
         "overwrite": true
-    }
-  kubernetes-pods-dashboard.json: |
+    }  kubernetes-pods-dashboard.json: |+
     {
         "dashboard": {
             "__inputs": [
@@ -2089,7 +2091,7 @@ data:
         ],
         "overwrite": true
     }
-  node-dashboard.json: |
+  node-dashboard.json: |+
     {
       "dashboard":
     {
@@ -2970,7 +2972,7 @@ data:
       ],
       "overwrite": true
     }
-  prometheus-datasource.json: |
+  prometheus-datasource.json: |+
     {
         "access": "proxy",
         "basicAuth": false,
@@ -2978,7 +2980,7 @@ data:
         "type": "prometheus",
         "url": "http://prometheus-k8s.monitoring.svc:9090"
     }
-  resource-requests-dashboard.json: |-
+  resource-requests-dashboard.json: |+
     {
       "__inputs": [
         {
@@ -3402,8 +3404,4 @@ data:
       "timezone": "browser",
       "title": "Resource Requests",
       "version": 1
-    }
-kind: ConfigMap
-metadata:
-  creationTimestamp: null
-  name: grafana-dashboards
+    }
\ No newline at end of file
diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml
index 08f6dddceaeb8e450c766a12f072e537397acf47..7327b0db5a516a78447eba49d602b99af142c4d7 100644
--- a/manifests/prometheus/prometheus-k8s-rules.yaml
+++ b/manifests/prometheus/prometheus-k8s-rules.yaml
@@ -1,62 +1,138 @@
 apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: prometheus-k8s-rules
+  labels:
+    role: prometheus-rulefiles
+    prometheus: k8s
 data:
-  etcd2.rules: "### General cluster availability ###\n\n# alert if another failed
-    peer will result in an unavailable cluster\nALERT InsufficientPeers\n  IF count(up{job=\"etcd-k8s\"}
-    == 0) > (count(up{job=\"etcd-k8s\"}) / 2 - 1)\n  FOR 3m\n  LABELS {\n    severity
-    = \"critical\"\n  }\n  ANNOTATIONS {\n    summary = \"Etcd cluster small\",\n
-    \   description = \"If one more etcd peer goes down the cluster will be unavailable\",\n
-    \ }\n\n### HTTP requests alerts ###\n\n# alert if more than 1% of requests to
-    an HTTP endpoint have failed with a non 4xx response\nALERT HighNumberOfFailedHTTPRequests\n
-    \ IF sum by(method) (rate(etcd_http_failed_total{job=\"etcd-k8s\", code!~\"4[0-9]{2}\"}[5m]))\n
-    \   / sum by(method) (rate(etcd_http_received_total{job=\"etcd-k8s\"}[5m])) >
-    0.01\n  FOR 10m\n  LABELS {\n    severity = \"warning\"\n  }\n  ANNOTATIONS {\n
-    \   summary = \"a high number of HTTP requests are failing\",\n    description
-    = \"{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance
-    {{ $labels.instance }}\",\n  }\n\n# alert if more than 5% of requests to an HTTP
-    endpoint have failed with a non 4xx response\nALERT HighNumberOfFailedHTTPRequests\n
-    \ IF sum by(method) (rate(etcd_http_failed_total{job=\"etcd-k8s\", code!~\"4[0-9]{2}\"}[5m]))
-    \n    / sum by(method) (rate(etcd_http_received_total{job=\"etcd-k8s\"}[5m]))
-    > 0.05\n  FOR 5m\n  LABELS {\n    severity = \"critical\"\n  }\n  ANNOTATIONS
-    {\n    summary = \"a high number of HTTP requests are failing\",\n    description
-    = \"{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance
-    {{ $labels.instance }}\",\n  }\n\n# alert if 50% of requests get a 4xx response\nALERT
-    HighNumberOfFailedHTTPRequests\n  IF sum by(method) (rate(etcd_http_failed_total{job=\"etcd-k8s\",
-    code=~\"4[0-9]{2}\"}[5m]))\n    / sum by(method) (rate(etcd_http_received_total{job=\"etcd-k8s\"}[5m]))
-    > 0.5\n  FOR 10m\n  LABELS {\n    severity = \"critical\"\n  }\n  ANNOTATIONS
-    {\n    summary = \"a high number of HTTP requests are failing\",\n    description
-    = \"{{ $value }}% of requests for {{ $labels.method }} failed with 4xx responses
-    on etcd instance {{ $labels.instance }}\",\n  }\n\n# alert if the 99th percentile
-    of HTTP requests take more than 150ms\nALERT HTTPRequestsSlow\n  IF histogram_quantile(0.99,
-    rate(etcd_http_successful_duration_second_bucket[5m])) > 0.15\n  FOR 10m\n  LABELS
-    {\n    severity = \"warning\"\n  }\n  ANNOTATIONS {\n    summary = \"slow HTTP
-    requests\",\n    description = \"on ectd instance {{ $labels.instance }} HTTP
-    requests to {{ $label.method }} are slow\",\n  }\n\n### File descriptor alerts
-    ###\n\ninstance:fd_utilization = process_open_fds / process_max_fds\n\n# alert
-    if file descriptors are likely to exhaust within the next 4 hours\nALERT FdExhaustionClose\n
-    \ IF predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1\n  FOR 10m\n  LABELS
-    {\n    severity = \"warning\"\n  }\n  ANNOTATIONS {\n    summary = \"file descriptors
-    soon exhausted\",\n    description = \"{{ $labels.job }} instance {{ $labels.instance
-    }} will exhaust in file descriptors soon\",\n  }\n\n# alert if file descriptors
-    are likely to exhaust within the next hour\nALERT FdExhaustionClose\n  IF predict_linear(instance:fd_utilization[10m],
-    3600) > 1\n  FOR 10m\n  LABELS {\n    severity = \"critical\"\n  }\n  ANNOTATIONS
-    {\n    summary = \"file descriptors soon exhausted\",\n    description = \"{{
-    $labels.job }} instance {{ $labels.instance }} will exhaust in file descriptors
-    soon\",\n  }\n\n### etcd proposal alerts ###\n\n# alert if there are several failed
-    proposals within an hour\nALERT HighNumberOfFailedProposals\n  IF increase(etcd_server_proposal_failed_total{job=\"etcd\"}[1h])
-    > 5\n  LABELS {\n    severity = \"warning\"\n  }\n  ANNOTATIONS {\n    summary
-    = \"a high number of failed proposals within the etcd cluster are happening\",\n
-    \   description = \"etcd instance {{ $labels.instance }} has seen {{ $value }}
-    proposal failures within the last hour\",\n  }\n\n### etcd disk io latency alerts
-    ###\n\n# alert if 99th percentile of fsync durations is higher than 500ms\nALERT
-    HighFsyncDurations\n  IF histogram_quantile(0.99, rate(etcd_wal_fsync_durations_seconds_bucket[5m]))
-    > 0.5\n  FOR 10m\n  LABELS {\n    severity = \"warning\"\n  }\n  ANNOTATIONS {\n
-    \   summary = \"high fsync durations\",\n    description = \"ectd instance {{
-    $labels.instance }} fync durations are high\",\n  }\n"
+  etcd2.rules: |+
+    ### General cluster availability ###
+    
+    # alert if another failed peer will result in an unavailable cluster
+    ALERT InsufficientPeers
+      IF count(up{job="etcd-k8s"} == 0) > (count(up{job="etcd-k8s"}) / 2 - 1)
+      FOR 3m
+      LABELS {
+        severity = "critical"
+      }
+      ANNOTATIONS {
+        summary = "Etcd cluster small",
+        description = "If one more etcd peer goes down the cluster will be unavailable",
+      }
+    
+    ### HTTP requests alerts ###
+    
+    # alert if more than 1% of requests to an HTTP endpoint have failed with a non 4xx response
+    ALERT HighNumberOfFailedHTTPRequests
+      IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code!~"4[0-9]{2}"}[5m]))
+        / sum by(method) (rate(etcd_http_received_total{job="etcd-k8s"}[5m])) > 0.01
+      FOR 10m
+      LABELS {
+        severity = "warning"
+      }
+      ANNOTATIONS {
+        summary = "a high number of HTTP requests are failing",
+        description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
+      }
+    
+    # alert if more than 5% of requests to an HTTP endpoint have failed with a non 4xx response
+    ALERT HighNumberOfFailedHTTPRequests
+      IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code!~"4[0-9]{2}"}[5m]))
+        / sum by(method) (rate(etcd_http_received_total{job="etcd-k8s"}[5m])) > 0.05
+      FOR 5m
+      LABELS {
+        severity = "critical"
+      }
+      ANNOTATIONS {
+        summary = "a high number of HTTP requests are failing",
+        description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
+      }
+    
+    # alert if 50% of requests get a 4xx response
+    ALERT HighNumberOfFailedHTTPRequests
+      IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code=~"4[0-9]{2}"}[5m]))
+        / sum by(method) (rate(etcd_http_received_total{job="etcd-k8s"}[5m])) > 0.5
+      FOR 10m
+      LABELS {
+        severity = "critical"
+      }
+      ANNOTATIONS {
+        summary = "a high number of HTTP requests are failing",
+        description = "{{ $value }}% of requests for {{ $labels.method }} failed with 4xx responses on etcd instance {{ $labels.instance }}",
+      }
+    
+    # alert if the 99th percentile of HTTP requests take more than 150ms
+    ALERT HTTPRequestsSlow
+      IF histogram_quantile(0.99, rate(etcd_http_successful_duration_second_bucket[5m])) > 0.15
+      FOR 10m
+      LABELS {
+        severity = "warning"
+      }
+      ANNOTATIONS {
+        summary = "slow HTTP requests",
+        description = "on ectd instance {{ $labels.instance }} HTTP requests to {{ $label.method }} are slow",
+      }
+    
+    ### File descriptor alerts ###
+    
+    instance:fd_utilization = process_open_fds / process_max_fds
+    
+    # alert if file descriptors are likely to exhaust within the next 4 hours
+    ALERT FdExhaustionClose
+      IF predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1
+      FOR 10m
+      LABELS {
+        severity = "warning"
+      }
+      ANNOTATIONS {
+        summary = "file descriptors soon exhausted",
+        description = "{{ $labels.job }} instance {{ $labels.instance }} will exhaust in file descriptors soon",
+      }
+    
+    # alert if file descriptors are likely to exhaust within the next hour
+    ALERT FdExhaustionClose
+      IF predict_linear(instance:fd_utilization[10m], 3600) > 1
+      FOR 10m
+      LABELS {
+        severity = "critical"
+      }
+      ANNOTATIONS {
+        summary = "file descriptors soon exhausted",
+        description = "{{ $labels.job }} instance {{ $labels.instance }} will exhaust in file descriptors soon",
+      }
+    
+    ### etcd proposal alerts ###
+    
+    # alert if there are several failed proposals within an hour
+    ALERT HighNumberOfFailedProposals
+      IF increase(etcd_server_proposal_failed_total{job="etcd"}[1h]) > 5
+      LABELS {
+        severity = "warning"
+      }
+      ANNOTATIONS {
+        summary = "a high number of failed proposals within the etcd cluster are happening",
+        description = "etcd instance {{ $labels.instance }} has seen {{ $value }} proposal failures within the last hour",
+      }
+    
+    ### etcd disk io latency alerts ###
+    
+    # alert if 99th percentile of fsync durations is higher than 500ms
+    ALERT HighFsyncDurations
+      IF histogram_quantile(0.99, rate(etcd_wal_fsync_durations_seconds_bucket[5m])) > 0.5
+      FOR 10m
+      LABELS {
+        severity = "warning"
+      }
+      ANNOTATIONS {
+        summary = "high fsync durations",
+        description = "ectd instance {{ $labels.instance }} fync durations are high",
+      }
   kubernetes.rules: |+
     # NOTE: These rules were kindly contributed by the SoundCloud engineering team.
-
+    
     ### Container resources ###
-
+    
     cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -65,7 +141,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:spec_cpu_shares =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -74,7 +150,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:cpu_usage:rate =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -85,7 +161,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:memory_usage:bytes =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -94,7 +170,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:memory_working_set:bytes =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -103,7 +179,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:memory_rss:bytes =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -112,7 +188,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:memory_cache:bytes =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -121,7 +197,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:disk_usage:bytes =
       sum by (cluster,namespace,controller,pod_name,container_name) (
         label_replace(
@@ -130,7 +206,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:memory_pagefaults:rate =
       sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
         label_replace(
@@ -141,7 +217,7 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     cluster_namespace_controller_pod_container:memory_oom:rate =
       sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
         label_replace(
@@ -152,39 +228,39 @@ data:
           "pod_name", "^(.*)-[a-z0-9]+"
         )
       )
-
+    
     ### Cluster resources ###
-
+    
     cluster:memory_allocation:percent =
       100 * sum by (cluster) (
         container_spec_memory_limit_bytes{pod_name!=""}
       ) / sum by (cluster) (
         machine_memory_bytes
       )
-
+    
     cluster:memory_used:percent =
       100 * sum by (cluster) (
         container_memory_usage_bytes{pod_name!=""}
       ) / sum by (cluster) (
         machine_memory_bytes
       )
-
+    
     cluster:cpu_allocation:percent =
       100 * sum by (cluster) (
         container_spec_cpu_shares{pod_name!=""}
       ) / sum by (cluster) (
         container_spec_cpu_shares{id="/"} * on(cluster,instance) machine_cpu_cores
       )
-
+    
     cluster:node_cpu_use:percent =
       100 * sum by (cluster) (
         rate(node_cpu{mode!="idle"}[5m])
       ) / sum by (cluster) (
         machine_cpu_cores
       )
-
+    
     ### API latency ###
-
+    
     # Raw metrics are in microseconds. Convert to seconds.
     cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.99"} =
       histogram_quantile(
@@ -201,30 +277,30 @@ data:
         0.5,
         sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
       ) / 1e6
-
+    
     ### Scheduling latency ###
-
+    
     cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.99"} =
       histogram_quantile(0.99,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
     cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.9"} =
       histogram_quantile(0.9,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
     cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.5"} =
       histogram_quantile(0.5,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
-
+    
     cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.99"} =
       histogram_quantile(0.99,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
     cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.9"} =
       histogram_quantile(0.9,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
     cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.5"} =
       histogram_quantile(0.5,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
-
+    
     cluster:scheduler_binding_latency:quantile_seconds{quantile="0.99"} =
       histogram_quantile(0.99,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
     cluster:scheduler_binding_latency:quantile_seconds{quantile="0.9"} =
       histogram_quantile(0.9,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
     cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
       histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
-
+    
     ALERT K8SNodeDown
       IF up{job="kubelet"} == 0
       FOR 1h
@@ -236,7 +312,7 @@ data:
         summary = "Kubelet cannot be scraped",
         description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour",
       }
-
+    
     ALERT K8SNodeNotReady
       IF kube_node_status_ready{condition="true"} == 0
       FOR 1h
@@ -248,7 +324,7 @@ data:
         summary = "Node status is NotReady",
         description = "The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour",
       }
-
+    
     ALERT K8SManyNodesNotReady
       IF
         count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1
@@ -267,7 +343,7 @@ data:
         summary = "Many K8s nodes are Not Ready",
         description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.",
       }
-
+    
     ALERT K8SKubeletNodeExporterDown
       IF up{job="node-exporter"} == 0
       FOR 15m
@@ -279,7 +355,7 @@ data:
         summary = "Kubelet node_exporter cannot be scraped",
         description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour.",
       }
-
+    
     ALERT K8SKubeletDown
       IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
       FOR 1h
@@ -291,7 +367,7 @@ data:
         summary = "Many Kubelets cannot be scraped",
         description = "Prometheus failed to scrape more than 10% of kubelets, or all Kubelets have disappeared from service discovery.",
       }
-
+    
     ALERT K8SApiserverDown
       IF up{job="kubernetes"} == 0
       FOR 15m
@@ -303,7 +379,7 @@ data:
         summary = "API server unreachable",
         description = "An API server could not be scraped.",
       }
-
+    
     # Disable for non HA kubernetes setups.
     ALERT K8SApiserverDown
       IF absent({job="kubernetes"}) or (count by(cluster) (up{job="kubernetes"} == 1) < count by(cluster) (up{job="kubernetes"}))
@@ -316,7 +392,7 @@ data:
         summary = "API server unreachable",
         description = "Prometheus failed to scrape multiple API servers, or all API servers have disappeared from service discovery.",
       }
-
+    
     ALERT K8SSchedulerDown
       IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
       FOR 5m
@@ -328,7 +404,7 @@ data:
         summary = "Scheduler is down",
         description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
       }
-
+    
     ALERT K8SControllerManagerDown
       IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
       FOR 5m
@@ -340,7 +416,7 @@ data:
         summary = "Controller manager is down",
         description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
       }
-
+    
     ALERT K8SConntrackTableFull
       IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50
       FOR 10m
@@ -352,7 +428,7 @@ data:
         summary = "Number of tracked connections is near the limit",
         description = "The nf_conntrack table is {{ $value }}% full.",
       }
-
+    
     ALERT K8SConntrackTableFull
       IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 90
       LABELS {
@@ -363,7 +439,7 @@ data:
         summary = "Number of tracked connections is near the limit",
         description = "The nf_conntrack table is {{ $value }}% full.",
       }
-
+    
     # To catch the conntrack sysctl de-tuning when it happens
     ALERT K8SConntrackTuningMissing
       IF node_nf_conntrack_udp_timeout > 10
@@ -376,7 +452,7 @@ data:
         summary = "Node does not have the correct conntrack tunings",
         description = "Nodes keep un-setting the correct tunings, investigate when it happens.",
       }
-
+    
     ALERT K8STooManyOpenFiles
       IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
       FOR 10m
@@ -388,7 +464,7 @@ data:
         summary = "{{ $labels.job }} has too many open file descriptors",
         description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.",
       }
-
+    
     ALERT K8STooManyOpenFiles
       IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
       FOR 10m
@@ -400,7 +476,7 @@ data:
         summary = "{{ $labels.job }} has too many open file descriptors",
         description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.",
       }
-
+    
     # Some verbs excluded because they are expected to be long-lasting:
     # WATCHLIST is long-poll, CONNECT is `kubectl exec`.
     ALERT K8SApiServerLatency
@@ -417,7 +493,7 @@ data:
         summary = "Kubernetes apiserver latency is high",
         description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.",
       }
-
+    
     ALERT K8SApiServerEtcdAccessLatency
       IF etcd_request_latencies_summary{quantile="0.99"} / 1e6 > 1.0
       FOR 15m
@@ -429,7 +505,7 @@ data:
         summary = "Access to etcd is slow",
         description = "99th percentile latency for apiserver to access etcd is higher than 1s.",
       }
-
+    
     ALERT K8SKubeletTooManyPods
       IF kubelet_running_pod_count > 100
       LABELS {
@@ -440,8 +516,4 @@ data:
         summary = "Kubelet is close to pod limit",
         description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110",
       }
-
-kind: ConfigMap
-metadata:
-  creationTimestamp: null
-  name: prometheus-k8s-rules
+    
diff --git a/manifests/prometheus/prometheus-k8s.yaml b/manifests/prometheus/prometheus-k8s.yaml
index 9054ea5868dbe938faeca867548ad8dda6a8ee11..23156650eb1c3fc5176003e0770f1895b6de640f 100644
--- a/manifests/prometheus/prometheus-k8s.yaml
+++ b/manifests/prometheus/prometheus-k8s.yaml
@@ -10,6 +10,10 @@ spec:
   serviceMonitorSelector:
     matchExpression:
     - {key: k8s-apps, operator: Exists}
+  ruleSelector:
+    matchLabels:
+      role: prometheus-rulefiles
+      prometheus: k8s
   resources:
     requests:
       # 2Gi is default, but won't schedule if you don't have a node with >2Gi