diff --git a/pkg/apis/upgrade.cattle.io/v1/types.go b/pkg/apis/upgrade.cattle.io/v1/types.go index e0b53116a225a5c059cf34da28a37b451dc82c75..85883e11d83d80bc25c0fc08998a0f63d12cc8d7 100644 --- a/pkg/apis/upgrade.cattle.io/v1/types.go +++ b/pkg/apis/upgrade.cattle.io/v1/types.go @@ -33,9 +33,10 @@ type Plan struct { // PlanSpec represents the user-configurable details of a Plan. type PlanSpec struct { - Concurrency int64 `json:"concurrency,omitempty"` - NodeSelector *metav1.LabelSelector `json:"nodeSelector,omitempty"` - ServiceAccountName string `json:"serviceAccountName,omitempty"` + Concurrency int64 `json:"concurrency,omitempty"` + JobActiveDeadlineSecs int64 `json:"jobActiveDeadlineSecs,omitempty"` + NodeSelector *metav1.LabelSelector `json:"nodeSelector,omitempty"` + ServiceAccountName string `json:"serviceAccountName,omitempty"` Channel string `json:"channel,omitempty"` Version string `json:"version,omitempty"` diff --git a/pkg/upgrade/job/job.go b/pkg/upgrade/job/job.go index 3b3ff330675fb1acbc1e6b65a6b7a64898faee8e..dca975bc7e04f1ec9d91d7daeac93b21e8318d88 100644 --- a/pkg/upgrade/job/job.go +++ b/pkg/upgrade/job/job.go @@ -41,6 +41,17 @@ var ( return defaultValue }(defaultActiveDeadlineSeconds) + ActiveDeadlineSecondsMax = func(defaultValue int64) int64 { + if str, ok := os.LookupEnv("SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS_MAX"); ok { + if i, err := strconv.ParseInt(str, 10, 64); err != nil { + logrus.Errorf("failed to parse $%s: %v", "SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS_MAX", err) + } else { + return i + } + } + return defaultValue + }(0 /* no maximum */) + BackoffLimit = func(defaultValue int32) int32 { if str, ok := os.LookupEnv("SYSTEM_UPGRADE_JOB_BACKOFF_LIMIT"); ok { if i, err := strconv.ParseInt(str, 10, 32); err != nil { @@ -327,8 +338,20 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat ), } - if ActiveDeadlineSeconds > 0 { - job.Spec.ActiveDeadlineSeconds = &ActiveDeadlineSeconds + activeDeadlineSeconds := ActiveDeadlineSeconds + + if plan.Spec.JobActiveDeadlineSecs > 0 { + activeDeadlineSeconds = plan.Spec.JobActiveDeadlineSecs + } + + // If configured with a maximum deadline via "SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS_MAX", + // clamp the Plan's given deadline to the maximum. + if ActiveDeadlineSecondsMax > 0 && activeDeadlineSeconds > ActiveDeadlineSecondsMax { + activeDeadlineSeconds = ActiveDeadlineSecondsMax + } + + if activeDeadlineSeconds > 0 { + job.Spec.ActiveDeadlineSeconds = &activeDeadlineSeconds if drain != nil && drain.Timeout != nil && drain.Timeout.Milliseconds() > ActiveDeadlineSeconds*1000 { logrus.Warnf("drain timeout exceeds active deadline seconds") } diff --git a/pkg/upgrade/job/job_suite_test.go b/pkg/upgrade/job/job_suite_test.go index 8ec2e6ed7995aa49754d2decf025b3faa287717e..183fc2c64e190aad5285073bbaf30a2f944ceff3 100644 --- a/pkg/upgrade/job/job_suite_test.go +++ b/pkg/upgrade/job/job_suite_test.go @@ -5,9 +5,87 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + + upgradev1 "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io/v1" + sucjob "github.com/rancher/system-upgrade-controller/pkg/upgrade/job" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func TestJob(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Job Suite") } + +var _ = Describe("Jobs", func() { + var plan *upgradev1.Plan + var node *corev1.Node + + BeforeEach(func() { + plan = &upgradev1.Plan{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "default", + }, + Spec: upgradev1.PlanSpec{ + Concurrency: 1, + ServiceAccountName: "system-upgrade-controller-foo", + Upgrade: &upgradev1.ContainerSpec{ + Image: "test-image:latest", + }, + }, + } + + node = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "prod.test.local", + }, + } + }) + + Describe("Setting the batchv1.Job ActiveDeadlineSeconds field", func() { + Context("When the Plan has a positive non-zero value for deadline", func() { + It("Constructs the batchv1.Job with the Plan's given value", func() { + plan.Spec.JobActiveDeadlineSecs = 12345 + job := sucjob.New(plan, node, "foo") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(12345))) + }) + }) + + Context("When the Plan has a zero-value given as its deadline", func() { + It("Constructs the batchv1.Job with a global default", func() { + oldActiveDeadlineSeconds := sucjob.ActiveDeadlineSeconds + sucjob.ActiveDeadlineSeconds = 300 + defer func() { sucjob.ActiveDeadlineSeconds = oldActiveDeadlineSeconds }() + + plan.Spec.JobActiveDeadlineSecs = 0 + job := sucjob.New(plan, node, "bar") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(300))) + }) + }) + + Context("When the Plan has a negative value given as its deadline", func() { + It("Constructs the batchv1.Job with a global default", func() { + oldActiveDeadlineSeconds := sucjob.ActiveDeadlineSeconds + sucjob.ActiveDeadlineSeconds = 3600 + defer func() { sucjob.ActiveDeadlineSeconds = oldActiveDeadlineSeconds }() + + plan.Spec.JobActiveDeadlineSecs = -1 + job := sucjob.New(plan, node, "baz") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(3600))) + }) + }) + + Context("When cluster has a maximum deadline and the Plan deadline exceeds that value", func() { + It("Constructs the batchv1.Job with the cluster's maximum deadline value", func() { + oldActiveDeadlineSecondsMax := sucjob.ActiveDeadlineSecondsMax + sucjob.ActiveDeadlineSecondsMax = 300 + defer func() { sucjob.ActiveDeadlineSecondsMax = oldActiveDeadlineSecondsMax }() + + plan.Spec.JobActiveDeadlineSecs = 600 + job := sucjob.New(plan, node, "foobar") + Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(int64(300))) + }) + }) + }) +})