diff --git a/vertical-pod-autoscaler/e2e/go.mod b/vertical-pod-autoscaler/e2e/go.mod index 97b899c4597cb08cc501ed079a7cfddb1652e6ca..c65221b4625bd50e086d1ee03ba557a95b05e8f7 100644 --- a/vertical-pod-autoscaler/e2e/go.mod +++ b/vertical-pod-autoscaler/e2e/go.mod @@ -14,7 +14,7 @@ require ( k8s.io/apimachinery v0.32.0 k8s.io/autoscaler/vertical-pod-autoscaler v1.2.1 k8s.io/client-go v0.32.0 - k8s.io/component-base v0.32.0 + k8s.io/component-base v0.32.2 k8s.io/klog/v2 v2.130.1 k8s.io/kubernetes v1.32.0 k8s.io/pod-security-admission v0.32.0 diff --git a/vertical-pod-autoscaler/e2e/go.sum b/vertical-pod-autoscaler/e2e/go.sum index 61add5f6b0072e999786ca43c497d6416eae8260..0bd3a2391182d2b580196280c563bea6c90d4e3c 100644 --- a/vertical-pod-autoscaler/e2e/go.sum +++ b/vertical-pod-autoscaler/e2e/go.sum @@ -92,6 +92,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg= github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= +github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= +github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= diff --git a/vertical-pod-autoscaler/e2e/v1/actuation.go b/vertical-pod-autoscaler/e2e/v1/actuation.go index 124d2c408226c35c2bf3b7e2534d39a3bf97251c..be4be3502c9a01f85a8521ddcebe3d8bc79bbf70 100644 --- a/vertical-pod-autoscaler/e2e/v1/actuation.go +++ b/vertical-pod-autoscaler/e2e/v1/actuation.go @@ -35,6 +35,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/autoscaler/vertical-pod-autoscaler/e2e/utils" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + restriction "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/restriction" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" @@ -50,6 +51,236 @@ import ( "github.com/onsi/gomega" ) +var _ = ActuationSuiteE2eDescribe("Actuation", ginkgo.Label("FG:InPlaceOrRecreate"), func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + checkInPlaceOrRecreateTestsEnabled(f, true, true) + }) + + ginkgo.It("still applies recommendations on restart when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + SetupHamsterDeployment(f, "100m", "100Mi", defaultHamsterReplicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podSet := MakePodSet(podList) + + ginkgo.By("Setting up a VPA CRD in mode InPlaceOrRecreate") + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("200m", ""). + WithLowerBound("200m", ""). + WithUpperBound("200m", ""). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + updatedCPURequest := ParseQuantityOrDie("200m") + + ginkgo.By(fmt.Sprintf("Waiting for pods to be evicted, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoPodsEvicted(f, podSet) + ginkgo.By("Forcefully killing one pod") + killPod(f, podList) + + ginkgo.By("Checking that request was modified after forceful restart") + updatedPodList, _ := GetHamsterPods(f) + var foundUpdated int32 + for _, pod := range updatedPodList.Items { + podRequest := getCPURequest(pod.Spec) + framework.Logf("podReq: %v", podRequest) + if podRequest.Cmp(updatedCPURequest) == 0 { + foundUpdated += 1 + } + } + gomega.Expect(foundUpdated).To(gomega.Equal(defaultHamsterReplicas)) + }) + + // TODO: add e2e test to verify metrics are getting updated + ginkgo.It("applies in-place updates to all containers when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + d := NewNHamstersDeployment(f, 2 /*number of containers*/) + d.Spec.Template.Spec.Containers[0].Resources.Requests = apiv1.ResourceList{ + apiv1.ResourceCPU: ParseQuantityOrDie("100m"), + apiv1.ResourceMemory: ParseQuantityOrDie("100Mi"), + } + d.Spec.Template.Spec.Containers[1].Resources.Requests = apiv1.ResourceList{ + apiv1.ResourceCPU: ParseQuantityOrDie("100m"), + apiv1.ResourceMemory: ParseQuantityOrDie("100Mi"), + } + targetCPU := "200m" + targetMemory := "200Mi" + _ = startDeploymentPods(f, d) // 3 replicas + container1Name := GetHamsterContainerNameByIndex(0) + container2Name := GetHamsterContainerNameByIndex(1) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(container1Name). + WithContainer(container2Name). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(container1Name). + WithTarget(targetCPU, targetMemory). + WithLowerBound(targetCPU, targetMemory). + WithUpperBound(targetCPU, targetMemory). + GetContainerResources()). + AppendRecommendation( + test.Recommendation(). + WithContainer(container2Name). + WithTarget(targetCPU, targetMemory). + WithLowerBound(targetCPU, targetMemory). + WithUpperBound(targetCPU, targetMemory). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Checking that resources were modified due to in-place update, not due to evictions") + err = WaitForPodsUpdatedWithoutEviction(f, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Checking that container resources were actually updated") + gomega.Eventually(func() error { + updatedPodList, err := GetHamsterPods(f) + if err != nil { + return err + } + for _, pod := range updatedPodList.Items { + for _, container := range pod.Status.ContainerStatuses { + cpuRequest := container.Resources.Requests[apiv1.ResourceCPU] + memoryRequest := container.Resources.Requests[apiv1.ResourceMemory] + if cpuRequest.Cmp(ParseQuantityOrDie(targetCPU)) != 0 { + framework.Logf("%v/%v has not been updated to %v yet: currently=%v", pod.Name, container.Name, targetCPU, cpuRequest.String()) + return fmt.Errorf("%s CPU request not updated", container.Name) + } + if memoryRequest.Cmp(ParseQuantityOrDie(targetMemory)) != 0 { + framework.Logf("%v/%v has not been updated to %v yet: currently=%v", pod.Name, container.Name, targetMemory, memoryRequest.String()) + return fmt.Errorf("%s Memory request not updated", container.Name) + } + } + } + return nil + }, VpaInPlaceTimeout*3, 15*time.Second).Should(gomega.Succeed()) + }) + + ginkgo.It("falls back to evicting pods when in-place update is Infeasible when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + replicas := int32(2) + SetupHamsterDeployment(f, "100m", "100Mi", replicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + updatedCPU := "999" // infeasible target + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, ""). + WithLowerBound("200m", ""). + WithUpperBound("200m", ""). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Waiting for pods to be evicted") + err = WaitForPodsEvicted(f, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("falls back to evicting pods when resize is Deferred and more than 5 minute has elapsed since last in-place update when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + replicas := int32(2) + SetupHamsterDeployment(f, "100m", "100Mi", replicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + + // we can force deferred resize by setting the target CPU to the allocatable CPU of the node + // it will be close enough to the node capacity, such that the kubelet defers instead of marking it infeasible + nodeName := podList.Items[0].Spec.NodeName + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + allocatableCPU := node.Status.Allocatable[apiv1.ResourceCPU] + updatedCPU := allocatableCPU.String() + + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, ""). + WithLowerBound("200m", ""). + WithUpperBound("200m", ""). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Waiting for status to be Deferred") + gomega.Eventually(func() error { + updatedPodList, err := GetHamsterPods(f) + if err != nil { + return err + } + for _, pod := range updatedPodList.Items { + if pod.Status.Resize == apiv1.PodResizeStatusDeferred { + return nil + } + } + return fmt.Errorf("status not deferred") + }, VpaInPlaceTimeout, 5*time.Second).Should(gomega.Succeed()) + + ginkgo.By("Making sure pods are not evicted yet") + gomega.Consistently(func() error { + updatedPodList, err := GetHamsterPods(f) + if err != nil { + return fmt.Errorf("failed to get pods: %v", err) + } + for _, pod := range updatedPodList.Items { + request := getCPURequestFromStatus(pod.Status) + if request.Cmp(ParseQuantityOrDie(updatedCPU)) == 0 { + framework.Logf("%v/%v updated to %v, that wasn't supposed to happen this early", pod.Name, containerName, updatedCPU) + return fmt.Errorf("%s CPU request should not have been updated", containerName) + } + } + return nil + }, restriction.DeferredResizeUpdateTimeout, 10*time.Second).Should(gomega.Succeed()) + + ginkgo.By("Waiting for pods to be evicted") + err = WaitForPodsEvicted(f, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) +}) + var _ = ActuationSuiteE2eDescribe("Actuation", func() { f := framework.NewDefaultFramework("vertical-pod-autoscaling") f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline @@ -519,6 +750,10 @@ func getCPURequest(podSpec apiv1.PodSpec) resource.Quantity { return podSpec.Containers[0].Resources.Requests[apiv1.ResourceCPU] } +func getCPURequestFromStatus(podStatus apiv1.PodStatus) resource.Quantity { + return podStatus.ContainerStatuses[0].Resources.Requests[apiv1.ResourceCPU] +} + func killPod(f *framework.Framework, podList *apiv1.PodList) { f.ClientSet.CoreV1().Pods(f.Namespace.Name).Delete(context.TODO(), podList.Items[0].Name, metav1.DeleteOptions{}) err := WaitForPodsRestarted(f, podList) diff --git a/vertical-pod-autoscaler/e2e/v1/admission_controller.go b/vertical-pod-autoscaler/e2e/v1/admission_controller.go index e3d526b3f066acb76e931e966848c2eb567c39be..502a07b81868eff79d6f7a380358984141b58aa3 100644 --- a/vertical-pod-autoscaler/e2e/v1/admission_controller.go +++ b/vertical-pod-autoscaler/e2e/v1/admission_controller.go @@ -37,10 +37,62 @@ import ( "github.com/onsi/gomega" ) +const ( + webhookConfigName = "vpa-webhook-config" + webhookName = "vpa.k8s.io" +) + +var _ = AdmissionControllerE2eDescribe("Admission-controller", ginkgo.Label("FG:InPlaceOrRecreate"), func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + checkInPlaceOrRecreateTestsEnabled(f, true, false) + waitForVpaWebhookRegistration(f) + }) + + ginkgo.It("starts pods with new recommended request with InPlaceOrRecreate mode", func() { + d := NewHamsterDeploymentWithResources(f, ParseQuantityOrDie("100m") /*cpu*/, ParseQuantityOrDie("100Mi") /*memory*/) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("250m", "200Mi"). + WithLowerBound("250m", "200Mi"). + WithUpperBound("250m", "200Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + podList := startDeploymentPods(f, d) + + // Originally Pods had 100m CPU, 100Mi of memory, but admission controller + // should change it to recommended 250m CPU and 200Mi of memory. + for _, pod := range podList.Items { + gomega.Expect(pod.Spec.Containers[0].Resources.Requests[apiv1.ResourceCPU]).To(gomega.Equal(ParseQuantityOrDie("250m"))) + gomega.Expect(pod.Spec.Containers[0].Resources.Requests[apiv1.ResourceMemory]).To(gomega.Equal(ParseQuantityOrDie("200Mi"))) + } + }) +}) + var _ = AdmissionControllerE2eDescribe("Admission-controller", func() { f := framework.NewDefaultFramework("vertical-pod-autoscaling") f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + ginkgo.BeforeEach(func() { + waitForVpaWebhookRegistration(f) + }) + ginkgo.It("starts pods with new recommended request", func() { d := NewHamsterDeploymentWithResources(f, ParseQuantityOrDie("100m") /*cpu*/, ParseQuantityOrDie("100Mi") /*memory*/) @@ -907,7 +959,6 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() { gomega.Expect(err).To(gomega.HaveOccurred(), "Invalid VPA object accepted") gomega.Expect(err.Error()).To(gomega.MatchRegexp(`.*admission webhook .*vpa.* denied the request: .*`), "Admission controller did not inspect the object") }) - }) func startDeploymentPods(f *framework.Framework, deployment *appsv1.Deployment) *apiv1.PodList { @@ -962,3 +1013,17 @@ func startDeploymentPods(f *framework.Framework, deployment *appsv1.Deployment) gomega.Expect(err).NotTo(gomega.HaveOccurred(), "when listing pods after deployment resize") return podList } + +func waitForVpaWebhookRegistration(f *framework.Framework) { + ginkgo.By("Waiting for VPA webhook registration") + gomega.Eventually(func() bool { + webhook, err := f.ClientSet.AdmissionregistrationV1().MutatingWebhookConfigurations().Get(context.TODO(), webhookConfigName, metav1.GetOptions{}) + if err != nil { + return false + } + if webhook != nil && len(webhook.Webhooks) > 0 && webhook.Webhooks[0].Name == webhookName { + return true + } + return false + }, 3*time.Minute, 5*time.Second).Should(gomega.BeTrue(), "Webhook was not registered in the cluster") +} diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go index d1c479df900797516a5b42ceab0e3c8c689bd014..80fb4db2046a62a29e08869817d6dbfa3d20205a 100644 --- a/vertical-pod-autoscaler/e2e/v1/common.go +++ b/vertical-pod-autoscaler/e2e/v1/common.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "time" ginkgo "github.com/onsi/ginkgo/v2" @@ -36,6 +37,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" framework_deployment "k8s.io/kubernetes/test/e2e/framework/deployment" @@ -53,9 +55,15 @@ const ( // VpaEvictionTimeout is a timeout for VPA to restart a pod if there are no // mechanisms blocking it (for example PDB). VpaEvictionTimeout = 3 * time.Minute + // VpaInPlaceTimeout is a timeout for the VPA to finish in-place resizing a + // pod, if there are no mechanisms blocking it. + VpaInPlaceTimeout = 2 * time.Minute defaultHamsterReplicas = int32(3) defaultHamsterBackoffLimit = int32(10) + + // VpaNamespace is the default namespace that holds the all the VPA components. + VpaNamespace = "kube-system" ) var hamsterTargetRef = &autoscaling.CrossVersionObjectReference{ @@ -67,38 +75,35 @@ var hamsterTargetRef = &autoscaling.CrossVersionObjectReference{ var hamsterLabels = map[string]string{"app": "hamster"} // SIGDescribe adds sig-autoscaling tag to test description. -func SIGDescribe(text string, body func()) bool { - return ginkgo.Describe(fmt.Sprintf("[sig-autoscaling] %v", text), body) -} - -// E2eDescribe describes a VPA e2e test. -func E2eDescribe(scenario, name string, body func()) bool { - return SIGDescribe(fmt.Sprintf("[VPA] [%s] [v1] %s", scenario, name), body) +// Takes args that are passed to ginkgo.Describe. +func SIGDescribe(scenario, name string, args ...interface{}) bool { + full := fmt.Sprintf("[sig-autoscaling] [VPA] [%s] [v1] %s", scenario, name) + return ginkgo.Describe(full, args...) } // RecommenderE2eDescribe describes a VPA recommender e2e test. -func RecommenderE2eDescribe(name string, body func()) bool { - return E2eDescribe(recommenderComponent, name, body) +func RecommenderE2eDescribe(name string, args ...interface{}) bool { + return SIGDescribe(recommenderComponent, name, args...) } // UpdaterE2eDescribe describes a VPA updater e2e test. -func UpdaterE2eDescribe(name string, body func()) bool { - return E2eDescribe(updateComponent, name, body) +func UpdaterE2eDescribe(name string, args ...interface{}) bool { + return SIGDescribe(updateComponent, name, args...) } // AdmissionControllerE2eDescribe describes a VPA admission controller e2e test. -func AdmissionControllerE2eDescribe(name string, body func()) bool { - return E2eDescribe(admissionControllerComponent, name, body) +func AdmissionControllerE2eDescribe(name string, args ...interface{}) bool { + return SIGDescribe(admissionControllerComponent, name, args...) } // FullVpaE2eDescribe describes a VPA full stack e2e test. -func FullVpaE2eDescribe(name string, body func()) bool { - return E2eDescribe(fullVpaSuite, name, body) +func FullVpaE2eDescribe(name string, args ...interface{}) bool { + return SIGDescribe(fullVpaSuite, name, args...) } // ActuationSuiteE2eDescribe describes a VPA actuation e2e test. -func ActuationSuiteE2eDescribe(name string, body func()) bool { - return E2eDescribe(actuationSuite, name, body) +func ActuationSuiteE2eDescribe(name string, args ...interface{}) bool { + return SIGDescribe(actuationSuite, name, args...) } // GetHamsterContainerNameByIndex returns name of i-th hamster container. @@ -555,3 +560,108 @@ func InstallLimitRangeWithMin(f *framework.Framework, minCpuLimit, minMemoryLimi minMemoryLimitQuantity := ParseQuantityOrDie(minMemoryLimit) installLimitRange(f, &minCpuLimitQuantity, &minMemoryLimitQuantity, nil, nil, lrType) } + +// WaitForPodsUpdatedWithoutEviction waits for pods to be updated without any evictions taking place over the polling +// interval. +func WaitForPodsUpdatedWithoutEviction(f *framework.Framework, initialPods *apiv1.PodList) error { + framework.Logf("waiting for at least one pod to be updated without eviction") + err := wait.PollUntilContextTimeout(context.TODO(), pollInterval, VpaInPlaceTimeout, false, func(context.Context) (bool, error) { + podList, err := GetHamsterPods(f) + if err != nil { + return false, err + } + resourcesHaveDiffered := false + podMissing := false + for _, initialPod := range initialPods.Items { + found := false + for _, pod := range podList.Items { + if initialPod.Name == pod.Name { + found = true + for num, container := range pod.Status.ContainerStatuses { + for resourceName, resourceLimit := range container.Resources.Limits { + initialResourceLimit := initialPod.Status.ContainerStatuses[num].Resources.Limits[resourceName] + if !resourceLimit.Equal(initialResourceLimit) { + framework.Logf("%s/%s: %s limit status(%v) differs from initial limit spec(%v)", pod.Name, container.Name, resourceName, resourceLimit.String(), initialResourceLimit.String()) + resourcesHaveDiffered = true + } + } + for resourceName, resourceRequest := range container.Resources.Requests { + initialResourceRequest := initialPod.Status.ContainerStatuses[num].Resources.Requests[resourceName] + if !resourceRequest.Equal(initialResourceRequest) { + framework.Logf("%s/%s: %s request status(%v) differs from initial request spec(%v)", pod.Name, container.Name, resourceName, resourceRequest.String(), initialResourceRequest.String()) + resourcesHaveDiffered = true + } + } + } + } + } + if !found { + podMissing = true + } + } + if podMissing { + return true, fmt.Errorf("a pod was erroneously evicted") + } + if resourcesHaveDiffered { + framework.Logf("after checking %d pods, resources have started to differ for at least one of them", len(podList.Items)) + return true, nil + } + return false, nil + }) + framework.Logf("finished waiting for at least one pod to be updated without eviction") + return err +} + +// checkInPlaceOrRecreateTestsEnabled check for enabled feature gates in the cluster used for the +// InPlaceOrRecreate VPA feature. +// Use this in a "beforeEach" call before any suites that use InPlaceOrRecreate featuregate. +func checkInPlaceOrRecreateTestsEnabled(f *framework.Framework, checkAdmission, checkUpdater bool) { + ginkgo.By("Checking InPlacePodVerticalScaling cluster feature gate is on") + + podList, err := f.ClientSet.CoreV1().Pods("kube-system").List(context.TODO(), metav1.ListOptions{ + LabelSelector: "component=kube-apiserver", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + apiServerPod := podList.Items[0] + gomega.Expect(apiServerPod.Spec.Containers).To(gomega.HaveLen(1)) + apiServerContainer := apiServerPod.Spec.Containers[0] + gomega.Expect(apiServerContainer.Name).To(gomega.Equal("kube-apiserver")) + if !anyContainsSubstring(apiServerContainer.Command, "InPlacePodVerticalScaling=true") { + ginkgo.Skip("Skipping suite: InPlacePodVerticalScaling feature gate is not enabled on the cluster level") + } + + if checkUpdater { + ginkgo.By("Checking InPlaceOrRecreate VPA feature gate is enabled for updater") + + deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-updater", metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1)) + vpaUpdaterPod := deploy.Spec.Template.Spec.Containers[0] + gomega.Expect(vpaUpdaterPod.Name).To(gomega.Equal("updater")) + if !anyContainsSubstring(vpaUpdaterPod.Args, fmt.Sprintf("%s=true", string(features.InPlaceOrRecreate))) { + ginkgo.Skip("Skipping suite: InPlaceOrRecreate feature gate is not enabled for the VPA updater") + } + } + + if checkAdmission { + ginkgo.By("Checking InPlaceOrRecreate VPA feature gate is enabled for admission controller") + + deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-admission-controller", metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1)) + vpaAdmissionPod := deploy.Spec.Template.Spec.Containers[0] + gomega.Expect(vpaAdmissionPod.Name).To(gomega.Equal("admission-controller")) + if !anyContainsSubstring(vpaAdmissionPod.Args, fmt.Sprintf("%s=true", string(features.InPlaceOrRecreate))) { + ginkgo.Skip("Skipping suite: InPlaceOrRecreate feature gate is not enabled for VPA admission controller") + } + } +} + +func anyContainsSubstring(arr []string, substr string) bool { + for _, s := range arr { + if strings.Contains(s, substr) { + return true + } + } + return false +} diff --git a/vertical-pod-autoscaler/e2e/v1/full_vpa.go b/vertical-pod-autoscaler/e2e/v1/full_vpa.go index f390ec5a7be666491b8b2b29dc77e83ae725c1e5..ec1467f58a53c174a5bb0d9923658e8be0fcb2e6 100644 --- a/vertical-pod-autoscaler/e2e/v1/full_vpa.go +++ b/vertical-pod-autoscaler/e2e/v1/full_vpa.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" podsecurity "k8s.io/pod-security-admission/api" @@ -60,73 +61,149 @@ var _ = FullVpaE2eDescribe("Pods under VPA", func() { f := framework.NewDefaultFramework("vertical-pod-autoscaling") f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline - ginkgo.BeforeEach(func() { - ns := f.Namespace.Name - ginkgo.By("Setting up a hamster deployment") - rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, - replicas, - 1, /*initCPUTotal*/ - 10, /*initMemoryTotal*/ - 1, /*initCustomMetric*/ - initialCPU, /*cpuRequest*/ - initialMemory, /*memRequest*/ - f.ClientSet, - f.ScalesGetter) + ginkgo.Describe("with InPlaceOrRecreate update mode", ginkgo.Label("FG:InPlaceOrRecreate"), func() { + ginkgo.BeforeEach(func() { + checkInPlaceOrRecreateTestsEnabled(f, true, false) + + ns := f.Namespace.Name + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + ginkgo.By("Setting up a VPA CRD") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } - ginkgo.By("Setting up a VPA CRD") - targetRef := &autoscaling.CrossVersionObjectReference{ - APIVersion: "apps/v1", - Kind: "Deployment", - Name: "hamster", - } + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("250m", "200Mi"). + WithLowerBound("250m", "200Mi"). + WithUpperBound("250m", "200Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + }) - containerName := GetHamsterContainerNameByIndex(0) - vpaCRD := test.VerticalPodAutoscaler(). - WithName("hamster-vpa"). - WithNamespace(f.Namespace.Name). - WithTargetRef(targetRef). - WithContainer(containerName). - AppendRecommendation( - test.Recommendation(). - WithContainer(containerName). - WithTarget("250m", "200Mi"). - WithLowerBound("250m", "200Mi"). - WithUpperBound("250m", "200Mi"). - GetContainerResources()). - Get() + ginkgo.It("have cpu requests growing with usage", func() { + // initial CPU usage is low so a minimal recommendation is expected + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // consume more CPU to get a higher recommendation + rc.ConsumeCPU(600 * replicas) + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("500m"), ParseQuantityOrDie("1300m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) - InstallVPA(f, vpaCRD) + ginkgo.It("have memory requests growing with usage", func() { + // initial memory usage is low so a minimal recommendation is expected + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory, + ParseQuantityOrDie(minimalMemoryLowerBound), ParseQuantityOrDie(minimalMemoryUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // consume more memory to get a higher recommendation + // NOTE: large range given due to unpredictability of actual memory usage + rc.ConsumeMem(1024 * replicas) + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory, + ParseQuantityOrDie("900Mi"), ParseQuantityOrDie("4000Mi")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) - ginkgo.It("have cpu requests growing with usage", func() { - // initial CPU usage is low so a minimal recommendation is expected - err := waitForResourceRequestInRangeInPods( - f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, - ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.Describe("with Recreate updateMode", func() { + ginkgo.BeforeEach(func() { + ns := f.Namespace.Name + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + ginkgo.By("Setting up a VPA CRD") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } - // consume more CPU to get a higher recommendation - rc.ConsumeCPU(600 * replicas) - err = waitForResourceRequestInRangeInPods( - f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, - ParseQuantityOrDie("500m"), ParseQuantityOrDie("1300m")) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithContainer(containerName). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("250m", "200Mi"). + WithLowerBound("250m", "200Mi"). + WithUpperBound("250m", "200Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + }) - ginkgo.It("have memory requests growing with usage", func() { - // initial memory usage is low so a minimal recommendation is expected - err := waitForResourceRequestInRangeInPods( - f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory, - ParseQuantityOrDie(minimalMemoryLowerBound), ParseQuantityOrDie(minimalMemoryUpperBound)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.It("have cpu requests growing with usage", func() { + // initial CPU usage is low so a minimal recommendation is expected + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // consume more CPU to get a higher recommendation + rc.ConsumeCPU(600 * replicas) + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("500m"), ParseQuantityOrDie("1300m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) - // consume more memory to get a higher recommendation - // NOTE: large range given due to unpredictability of actual memory usage - rc.ConsumeMem(1024 * replicas) - err = waitForResourceRequestInRangeInPods( - f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory, - ParseQuantityOrDie("900Mi"), ParseQuantityOrDie("4000Mi")) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.It("have memory requests growing with usage", func() { + // initial memory usage is low so a minimal recommendation is expected + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory, + ParseQuantityOrDie(minimalMemoryLowerBound), ParseQuantityOrDie(minimalMemoryUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // consume more memory to get a higher recommendation + // NOTE: large range given due to unpredictability of actual memory usage + rc.ConsumeMem(1024 * replicas) + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory, + ParseQuantityOrDie("900Mi"), ParseQuantityOrDie("4000Mi")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) }) diff --git a/vertical-pod-autoscaler/e2e/v1/updater.go b/vertical-pod-autoscaler/e2e/v1/updater.go index ccf0f07ded1adc12e8543a65ec97b656a5db7e77..a72cdf6b1eba179a4225ad84452f062489b87ea3 100644 --- a/vertical-pod-autoscaler/e2e/v1/updater.go +++ b/vertical-pod-autoscaler/e2e/v1/updater.go @@ -140,6 +140,77 @@ var _ = UpdaterE2eDescribe("Updater", func() { }) }) +var _ = UpdaterE2eDescribe("Updater", ginkgo.Label("FG:InPlaceOrRecreate"), func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + checkInPlaceOrRecreateTestsEnabled(f, false, true) + }) + + ginkgo.It("In-place update pods when Admission Controller status available", func() { + const statusUpdateInterval = 10 * time.Second + + ginkgo.By("Setting up the Admission Controller status") + stopCh := make(chan struct{}) + statusUpdater := status.NewUpdater( + f.ClientSet, + status.AdmissionControllerStatusName, + status.AdmissionControllerStatusNamespace, + statusUpdateInterval, + "e2e test", + ) + defer func() { + // Schedule a cleanup of the Admission Controller status. + // Status is created outside the test namespace. + ginkgo.By("Deleting the Admission Controller status") + close(stopCh) + err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace). + Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + statusUpdater.Run(stopCh) + + podList := setupPodsForUpscalingInPlace(f) + initialPods := podList.DeepCopy() + + ginkgo.By("Waiting for pods to be in-place updated") + err := WaitForPodsUpdatedWithoutEviction(f, initialPods) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Does not evict pods for downscaling in-place", func() { + const statusUpdateInterval = 10 * time.Second + + ginkgo.By("Setting up the Admission Controller status") + stopCh := make(chan struct{}) + statusUpdater := status.NewUpdater( + f.ClientSet, + status.AdmissionControllerStatusName, + status.AdmissionControllerStatusNamespace, + statusUpdateInterval, + "e2e test", + ) + defer func() { + // Schedule a cleanup of the Admission Controller status. + // Status is created outside the test namespace. + ginkgo.By("Deleting the Admission Controller status") + close(stopCh) + err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace). + Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + statusUpdater.Run(stopCh) + + podList := setupPodsForDownscalingInPlace(f, nil) + initialPods := podList.DeepCopy() + + ginkgo.By("Waiting for pods to be in-place downscaled") + err := WaitForPodsUpdatedWithoutEviction(f, initialPods) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) +}) + func setupPodsForUpscalingEviction(f *framework.Framework) *apiv1.PodList { return setupPodsForEviction(f, "100m", "100Mi", nil) } @@ -165,6 +236,7 @@ func setupPodsForEviction(f *framework.Framework, hamsterCPU, hamsterMemory stri WithName("hamster-vpa"). WithNamespace(f.Namespace.Name). WithTargetRef(controller). + WithUpdateMode(vpa_types.UpdateModeRecreate). WithEvictionRequirements(er). WithContainer(containerName). AppendRecommendation( @@ -180,3 +252,48 @@ func setupPodsForEviction(f *framework.Framework, hamsterCPU, hamsterMemory stri return podList } + +func setupPodsForUpscalingInPlace(f *framework.Framework) *apiv1.PodList { + return setupPodsForInPlace(f, "100m", "100Mi", nil, true) +} + +func setupPodsForDownscalingInPlace(f *framework.Framework, er []*vpa_types.EvictionRequirement) *apiv1.PodList { + return setupPodsForInPlace(f, "500m", "500Mi", er, true) +} + +func setupPodsForInPlace(f *framework.Framework, hamsterCPU, hamsterMemory string, er []*vpa_types.EvictionRequirement, withRecommendation bool) *apiv1.PodList { + controller := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster-deployment", + } + ginkgo.By(fmt.Sprintf("Setting up a hamster %v", controller.Kind)) + setupHamsterController(f, controller.Kind, hamsterCPU, hamsterMemory, defaultHamsterReplicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + vpaBuilder := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(controller). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithEvictionRequirements(er). + WithContainer(containerName) + + if withRecommendation { + vpaBuilder = vpaBuilder.AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(containerName, "200m"). + WithLowerBound(containerName, "200m"). + WithUpperBound(containerName, "200m"). + GetContainerResources()) + } + + vpaCRD := vpaBuilder.Get() + InstallVPA(f, vpaCRD) + + return podList +} diff --git a/vertical-pod-autoscaler/hack/run-e2e-tests.sh b/vertical-pod-autoscaler/hack/run-e2e-tests.sh index e0970907a6d9416503f3f972047dfd0de8aa7cc0..f7f724925cba44aba111a7262378cc6392db54ce 100755 --- a/vertical-pod-autoscaler/hack/run-e2e-tests.sh +++ b/vertical-pod-autoscaler/hack/run-e2e-tests.sh @@ -50,7 +50,7 @@ case ${SUITE} in recommender|updater|admission-controller|actuation|full-vpa) export KUBECONFIG=$HOME/.kube/config pushd ${SCRIPT_ROOT}/e2e - go test ./v1/*go -v --test.timeout=90m --args --ginkgo.v=true --ginkgo.focus="\[VPA\] \[${SUITE}\]" --report-dir=${WORKSPACE} --disable-log-dump --ginkgo.timeout=90m + go test ./v1/*go -v --test.timeout=150m --args --ginkgo.v=true --ginkgo.focus="\[VPA\] \[${SUITE}\]" --report-dir=${WORKSPACE} --disable-log-dump --ginkgo.timeout=150m V1_RESULT=$? popd echo v1 test result: ${V1_RESULT} diff --git a/vertical-pod-autoscaler/hack/vpa-process-yamls.sh b/vertical-pod-autoscaler/hack/vpa-process-yamls.sh index acb4887eb5258ab73627ccebc34db1bba6ea473e..ddd87ed26681858920612950f2ff8becc8d5c0c2 100755 --- a/vertical-pod-autoscaler/hack/vpa-process-yamls.sh +++ b/vertical-pod-autoscaler/hack/vpa-process-yamls.sh @@ -70,7 +70,7 @@ for i in $COMPONENTS; do elif [ ${ACTION} == delete ] ; then (bash ${SCRIPT_ROOT}/pkg/admission-controller/rmcerts.sh || true) (bash ${SCRIPT_ROOT}/pkg/admission-controller/delete-webhook.sh || true) - kubectl delete -f ${SCRIPT_ROOT}/deploy/admission-controller-service.yaml + kubectl delete -f ${SCRIPT_ROOT}/deploy/admission-controller-service.yaml --ignore-not-found fi fi if [[ ${ACTION} == print ]]; then