diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 4c5f7a7bf1b0e0c92cd03c6c78c2230a49e526d2..3fbc6d8a49a1ec1c7df78cee2676efa428ec1f0a 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -263,6 +263,139 @@ spec: type: array items: type: string + liveness_probe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: One and only one of the following should be specified. + Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a TCP + port. TCP hooks not yet supported TODO: implement a realistic + TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to + terminate gracefully upon probe failure. The grace period + is the duration in seconds after the processes running in + the pod are sent a termination signal and the time when + the processes are forcibly halted with a kill signal. Set + this value longer than the expected cleanup time for your + process. If this value is nil, the pod's terminationGracePeriodSeconds + will be used. Otherwise, this value overrides the value + provided by the pod spec. Value must be non-negative integer. + The value zero indicates stop immediately via the kill signal + (no opportunity to shut down). This is a beta field and + requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is + used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object master_pod_move_timeout: type: string default: "20m" diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index e304aa340d4a3d39b55e4db7159ff32b5ea77807..4b88d223743fe763a3145aa5a604ecc81bacf1ce 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -202,6 +202,140 @@ spec: items: type: object x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object init_containers: type: array description: deprecated diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index bf44b765ffc72a2748e102e352d29dffe24b2d3e..6951fdd0d16d4885515a480d6fdade2f9d63b5cc 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -213,6 +213,19 @@ configKubernetes: # whether the Spilo container should run with additional permissions other than parent. # required by cron which needs setuid spilo_allow_privilege_escalation: true + + # liveness probe for the spilo pod + # liveness_probe: + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 + # storage resize strategy, available options are: ebs, pvc, off or mixed storage_resize_mode: pvc # pod toleration assigned to instances of every Postgres cluster diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 0f4f0bab6823a95b3851b4f21a4afa4bbc105994..c0f951c87c09c84e76ad55c6be2de29690859d94 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -85,6 +85,10 @@ These parameters are grouped directly under the `spec` key in the manifest. requires a custom Spilo image. Note the FSGroup of a Pod cannot be changed without recreating a new Pod. Optional. +* **livenessProbe** + Allows for adding a liveness probe to the Spilo container to detect if it's + running properly. + * **enableMasterLoadBalancer** boolean flag to override the operator defaults (set by the `enable_master_load_balancer` parameter) to define whether to enable the load diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 861ea842e1761bcaccac69755a2edae10209fe76..675307364de6fd4d994a27deb0deb503b88da041 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -482,6 +482,10 @@ configuration they are grouped under the `kubernetes` key. process. Required by cron which needs setuid. Without this parameter, certification rotation & backups will not be done. The default is `true`. +* **liveness_probe** + Allows for adding a liveness probe to the Spilo container to detect if it's + running properly. + * **additional_pod_capabilities** list of additional capabilities to be added to the postgres container's SecurityContext (e.g. SYS_NICE etc.). Please, make sure first that the diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 390cf10462cb54fec37d56b74222045d58125893..98b63c26fe50e666d3877c532425a5fefcc03647 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -154,6 +154,16 @@ data: # spilo_runasgroup: 103 # spilo_fsgroup: 103 spilo_privileged: "false" + # liveness_probe: |- + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 storage_resize_mode: "pvc" super_username: postgres # target_major_version: "15" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index d0b30ef3703e2a0060c625dbd128a4b476447806..7767ac902cef312c62972a8c4b44e4f6b99a8561 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -258,6 +258,139 @@ spec: type: array items: type: string + liveness_probe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: One and only one of the following should be specified. + Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a TCP + port. TCP hooks not yet supported TODO: implement a realistic + TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to + terminate gracefully upon probe failure. The grace period + is the duration in seconds after the processes running in + the pod are sent a termination signal and the time when + the processes are forcibly halted with a kill signal. Set + this value longer than the expected cleanup time for your + process. If this value is nil, the pod's terminationGracePeriodSeconds + will be used. Otherwise, this value overrides the value + provided by the pod spec. Value must be non-negative integer. + The value zero indicates stop immediately via the kill signal + (no opportunity to shut down). This is a beta field and + requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is + used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object master_pod_move_timeout: type: string default: "20m" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index d2bb996446dd3dbe8e16142e2cd058f2d41de1dc..5efc3131113eb8fef5ad08ae7ddc70057ad23888 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -78,6 +78,16 @@ configuration: # inherited_labels: # - application # - environment + # liveness_probe: + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 master_pod_move_timeout: 20m # node_readiness_label: # status: ready diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index f0174c80f2097c78d32cb81c6295d895cfb24ff1..d8321c6f63df9253f41a66a4182ea513c3a3ca79 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -200,6 +200,140 @@ spec: items: type: object x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object init_containers: type: array description: deprecated diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 0c66b4b123daad2a93b22a9d3ce803a55bcb05ff..1cda575a59d7f4be482aaab6c93dc0b5847650d9 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -364,6 +364,132 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "livenessProbe": { + Description: "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "exec": { + Description: "One and only one of the following should be specified. Exec specifies the action to take.", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "command": { + Description: "Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, + }, + }, + "failureThreshold": { + Description: "Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "httpGet": { + Description: "HTTPGet specifies the http request to perform.", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Host name to connect to, defaults to the pod IP. You probably want to set \"Host\" in httpHeaders instead.", + Type: "string", + }, + "httpHeaders": { + Description: "Custom headers to set in the request. HTTP allows repeated headers.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Description: "HTTPHeader describes a custom header to be used in HTTP probes", + Type: "object", + Required: []string{"name", "value"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "name": { + Description: "The header field name", + Type: "string", + }, + "value": { + Description: "The header field value", + Type: "string", + }, + }, + }, + }, + }, + "path": { + Description: "Path to access on the HTTP server.", + Type: "string", + }, + "port": { + Description: "Name or number of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + XIntOrString: true, + }, + "scheme": { + Description: "Scheme to use for connecting to the host. Defaults to HTTP.", + Type: "string", + }, + }, + }, + "initialDelaySeconds": { + Description: "Number of seconds after the container has started before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + "periodSeconds": { + Description: "How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "successThreshold": { + Description: "Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "tcpSocket": { + Description: "TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Optional: Host name to connect to, defaults to the pod IP.", + Type: "string", + }, + "port": { + Description: "Number or name of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + XIntOrString: true, + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + }, + }, + }, + "terminationGracePeriodSeconds": { + Description: "Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.", + Type: "integer", + Format: "int64", + }, + "timeoutSeconds": { + Description: "Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + }, + }, "nodeAffinity": { Type: "object", Properties: map[string]apiextv1.JSONSchemaProps{ @@ -1383,6 +1509,132 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "liveness_probe": { + Description: "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "exec": { + Description: "One and only one of the following should be specified. Exec specifies the action to take.", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "command": { + Description: "Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, + }, + }, + "failureThreshold": { + Description: "Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "httpGet": { + Description: "HTTPGet specifies the http request to perform.", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Host name to connect to, defaults to the pod IP. You probably want to set \"Host\" in httpHeaders instead.", + Type: "string", + }, + "httpHeaders": { + Description: "Custom headers to set in the request. HTTP allows repeated headers.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Description: "HTTPHeader describes a custom header to be used in HTTP probes", + Type: "object", + Required: []string{"name", "value"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "name": { + Description: "The header field name", + Type: "string", + }, + "value": { + Description: "The header field value", + Type: "string", + }, + }, + }, + }, + }, + "path": { + Description: "Path to access on the HTTP server.", + Type: "string", + }, + "port": { + Description: "Name or number of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + XIntOrString: true, + }, + "scheme": { + Description: "Scheme to use for connecting to the host. Defaults to HTTP.", + Type: "string", + }, + }, + }, + "initialDelaySeconds": { + Description: "Number of seconds after the container has started before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + "periodSeconds": { + Description: "How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "successThreshold": { + Description: "Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "tcpSocket": { + Description: "TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Optional: Host name to connect to, defaults to the pod IP.", + Type: "string", + }, + "port": { + Description: "Number or name of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + XIntOrString: true, + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + }, + }, + }, + "terminationGracePeriodSeconds": { + Description: "Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.", + Type: "integer", + Format: "int64", + }, + "timeoutSeconds": { + Description: "Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + }, + }, "master_pod_move_timeout": { Type: "string", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 69d591e425636112f36e1a56dbbf7cdb159a9f9f..c805d5ba89ab6affca6c421fd5121467901691eb 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -105,6 +105,7 @@ type KubernetesMetaConfiguration struct { EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"` EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` EnableFinalizers *bool `json:"enable_finalizers,omitempty"` + LivenessProbe *v1.Probe `json:"liveness_probe,omitempty"` } // PostgresPodResourcesDefaults defines the spec of default resources diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index edce944f12505ea93c42c8c2d404e8aa55f4007c..a1a4c33997efe54f9e332da92e5f45d31a3567c8 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -87,6 +87,7 @@ type PostgresSpec struct { AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"` Streams []Stream `json:"streams,omitempty"` Env []v1.EnvVar `json:"env,omitempty"` + LivenessProbe *v1.Probe `json:"livenessProbe,omitempty"` // deprecated json tags InitContainersOld []v1.Container `json:"init_containers,omitempty"` diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 69ce4fc9a40e7e637e91438b89987bdaec6e0fa7..821c701e78218c2c288b795f3777174bf3584568 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -277,6 +277,11 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura *out = new(bool) **out = **in } + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(corev1.Probe) + (*in).DeepCopyInto(*out) + } return } @@ -849,6 +854,11 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(corev1.Probe) + (*in).DeepCopyInto(*out) + } if in.InitContainersOld != nil { in, out := &in.InitContainersOld, &out.InitContainersOld *out = make([]corev1.Container, len(*in)) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b00d4b6b7d1840582ebceec6bf1a9ec633a1ff06..08dc4bb07957fbbaa206adca54d500cff77a4932 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -580,6 +580,10 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe func(a, b v1.Container) bool { return !reflect.DeepEqual(a.SecurityContext, b.SecurityContext) }), newCheck("new statefulset %s's %s (index %d) volume mounts do not match the current one", func(a, b v1.Container) bool { return !reflect.DeepEqual(a.VolumeMounts, b.VolumeMounts) }), + newCheck("new statefulset %s's %s (index %d) readiness probe do not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.ReadinessProbe, b.ReadinessProbe) }), + newCheck("new statefulset %s's %s (index %d) liveness probe do not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.LivenessProbe, b.LivenessProbe) }), } if !c.OpConfig.EnableLazySpiloUpgrade { diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index b60c4ecb3fffdcc2effaa284cb3ba730d68c561a..5c3ca128c0d679f1884324cf3a863bd0666f1ca7 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1244,6 +1244,19 @@ func generateSpiloReadinessProbe() *v1.Probe { } } +func generateSpiloLivenessProbe(probe, defaultProbe *v1.Probe) *v1.Probe { + + if probe != nil { + return probe + } + + if defaultProbe != nil { + return defaultProbe + } + + return nil +} + func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.StatefulSet, error) { var ( @@ -1363,6 +1376,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef spiloContainer.ReadinessProbe = generateSpiloReadinessProbe() } + spiloContainer.LivenessProbe = generateSpiloLivenessProbe(spec.LivenessProbe, c.OpConfig.LivenessProbe) + // generate container specs for sidecars specified in the cluster manifest clusterSpecificSidecars := []v1.Container{} if spec.Sidecars != nil && len(spec.Sidecars) > 0 { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 7c964e292bca1b6957a63de20212525eeba59cd7..f66dc3d7b994d360cc82c042438ddbdb3772a8e4 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -128,6 +128,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname") result.PodAntiAffinityPreferredDuringScheduling = fromCRD.Kubernetes.PodAntiAffinityPreferredDuringScheduling result.PodToleration = fromCRD.Kubernetes.PodToleration + result.LivenessProbe = fromCRD.Kubernetes.LivenessProbe // Postgres Pod resources result.DefaultCPURequest = util.Coalesce(fromCRD.PostgresPodResources.DefaultCPURequest, "100m") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index c0fda940b08c8badf56601f117b37ad055fe5565..4f3ae6c08c2368142227de76568f3bb7e2f61222 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -61,6 +61,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` NodeReadinessLabelMerge string `name:"node_readiness_label_merge" default:"OR"` ShmVolume *bool `name:"enable_shm_volume" default:"true"` + LivenessProbe *v1.Probe `name:"liveness_probe"` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` @@ -248,6 +249,7 @@ type Config struct { TargetMajorVersion string `name:"target_major_version" default:"15"` PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"` PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"` + LivenessProbe *v1.Probe `json:"liveness_probe,omitempty"` EnablePatroniFailsafeMode *bool `name:"enable_patroni_failsafe_mode" default:"false"` PersistentVolumeClaimRetentionPolicy map[string]string `name:"persistent_volume_claim_retention_policy" default:"when_deleted:retain,when_scaled:retain"` }