From d2569dedb4b0e8b59fe6e5009c21317c223ff486 Mon Sep 17 00:00:00 2001 From: Connor Kuehl <connor.kuehl@suse.com> Date: Tue, 20 Jun 2023 13:27:57 -0500 Subject: [PATCH] Allow Plan to specify Job deadline If a workload is known to be slow in excess of the system-upgrade-controller deployment's default deadline, allow the Plan creator to specify a job deadline with the plan so that the entailing job may run to completion. Signed-off-by: Connor Kuehl <connor.kuehl@suse.com> --- pkg/apis/upgrade.cattle.io/v1/types.go | 7 ++- pkg/upgrade/job/job.go | 27 ++++++++- pkg/upgrade/job/job_suite_test.go | 78 ++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 5 deletions(-) diff --git a/pkg/apis/upgrade.cattle.io/v1/types.go b/pkg/apis/upgrade.cattle.io/v1/types.go index e0b53116..85883e11 100644 --- a/pkg/apis/upgrade.cattle.io/v1/types.go +++ b/pkg/apis/upgrade.cattle.io/v1/types.go @@ -33,9 +33,10 @@ type Plan struct { // PlanSpec represents the user-configurable details of a Plan. type PlanSpec struct { - Concurrency int64 `json:"concurrency,omitempty"` - NodeSelector *metav1.LabelSelector `json:"nodeSelector,omitempty"` - ServiceAccountName string `json:"serviceAccountName,omitempty"` + Concurrency int64 `json:"concurrency,omitempty"` + JobActiveDeadlineSecs int64 `json:"jobActiveDeadlineSecs,omitempty"` + NodeSelector *metav1.LabelSelector `json:"nodeSelector,omitempty"` + ServiceAccountName string `json:"serviceAccountName,omitempty"` Channel string `json:"channel,omitempty"` Version string `json:"version,omitempty"` diff --git a/pkg/upgrade/job/job.go b/pkg/upgrade/job/job.go index 3b3ff330..dca975bc 100644 --- a/pkg/upgrade/job/job.go +++ b/pkg/upgrade/job/job.go @@ -41,6 +41,17 @@ var ( return defaultValue }(defaultActiveDeadlineSeconds) + ActiveDeadlineSecondsMax = func(defaultValue int64) int64 { + if str, ok := os.LookupEnv("SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS_MAX"); ok { + if i, err := strconv.ParseInt(str, 10, 64); err != nil { + logrus.Errorf("failed to parse $%s: %v", "SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS_MAX", err) + } else { + return i + } + } + return defaultValue + }(0 /* no maximum */) + BackoffLimit = func(defaultValue int32) int32 { if str, ok := os.LookupEnv("SYSTEM_UPGRADE_JOB_BACKOFF_LIMIT"); ok { if i, err := strconv.ParseInt(str, 10, 32); err != nil { @@ -327,8 +338,20 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat ), } - if ActiveDeadlineSeconds > 0 { - job.Spec.ActiveDeadlineSeconds = &ActiveDeadlineSeconds + activeDeadlineSeconds := ActiveDeadlineSeconds + + if plan.Spec.JobActiveDeadlineSecs > 0 { + activeDeadlineSeconds = plan.Spec.JobActiveDeadlineSecs + } + + // If configured with a maximum deadline via "SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS_MAX", + // clamp the Plan's given deadline to the maximum. + if ActiveDeadlineSecondsMax > 0 && activeDeadlineSeconds > ActiveDeadlineSecondsMax { + activeDeadlineSeconds = ActiveDeadlineSecondsMax + } + + if activeDeadlineSeconds > 0 { + job.Spec.ActiveDeadlineSeconds = &activeDeadlineSeconds if drain != nil && drain.Timeout != nil && drain.Timeout.Milliseconds() > ActiveDeadlineSeconds*1000 { logrus.Warnf("drain timeout exceeds active deadline seconds") } diff --git a/pkg/upgrade/job/job_suite_test.go b/pkg/upgrade/job/job_suite_test.go index 8ec2e6ed..183fc2c6 100644 --- a/pkg/upgrade/job/job_suite_test.go +++ b/pkg/upgrade/job/job_suite_test.go @@ -5,9 +5,87 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + + upgradev1 "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io/v1" + sucjob "github.com/rancher/system-upgrade-controller/pkg/upgrade/job" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func TestJob(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Job Suite") } + +var _ = Describe("Jobs", func() { + var plan *upgradev1.Plan + var node *corev1.Node + + BeforeEach(func() { + plan = &upgradev1.Plan{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "default", + }, + Spec: upgradev1.PlanSpec{ + Concurrency: 1, + ServiceAccountName: "system-upgrade-controller-foo", + Upgrade: &upgradev1.ContainerSpec{ + Image: "test-image:latest", + }, + }, + } + + node = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "prod.test.local", + }, + } + }) + + Describe("Setting the batchv1.Job ActiveDeadlineSeconds field", func() { + Context("When the Plan has a positive non-zero value for deadline", func() { + It("Constructs the batchv1.Job with the Plan's given value", func() { + plan.Spec.JobActiveDeadlineSecs = 12345 + job := sucjob.New(plan, node, "foo") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(12345))) + }) + }) + + Context("When the Plan has a zero-value given as its deadline", func() { + It("Constructs the batchv1.Job with a global default", func() { + oldActiveDeadlineSeconds := sucjob.ActiveDeadlineSeconds + sucjob.ActiveDeadlineSeconds = 300 + defer func() { sucjob.ActiveDeadlineSeconds = oldActiveDeadlineSeconds }() + + plan.Spec.JobActiveDeadlineSecs = 0 + job := sucjob.New(plan, node, "bar") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(300))) + }) + }) + + Context("When the Plan has a negative value given as its deadline", func() { + It("Constructs the batchv1.Job with a global default", func() { + oldActiveDeadlineSeconds := sucjob.ActiveDeadlineSeconds + sucjob.ActiveDeadlineSeconds = 3600 + defer func() { sucjob.ActiveDeadlineSeconds = oldActiveDeadlineSeconds }() + + plan.Spec.JobActiveDeadlineSecs = -1 + job := sucjob.New(plan, node, "baz") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(3600))) + }) + }) + + Context("When cluster has a maximum deadline and the Plan deadline exceeds that value", func() { + It("Constructs the batchv1.Job with the cluster's maximum deadline value", func() { + oldActiveDeadlineSecondsMax := sucjob.ActiveDeadlineSecondsMax + sucjob.ActiveDeadlineSecondsMax = 300 + defer func() { sucjob.ActiveDeadlineSecondsMax = oldActiveDeadlineSecondsMax }() + + plan.Spec.JobActiveDeadlineSecs = 600 + job := sucjob.New(plan, node, "foobar") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(300))) + }) + }) + }) +}) -- GitLab