From f0e7f92ef1d98b0c541572bbafa347be44214dad Mon Sep 17 00:00:00 2001 From: Soule BA Date: Fri, 5 Jan 2024 23:43:24 +0100 Subject: [PATCH 1/8] Add RFC - Custom Health Checks for Kustomization using Common Expression Language(CEL) Signed-off-by: Soule BA --- rfcs/0000-custom-health-checks/README.md | 330 +++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 rfcs/0000-custom-health-checks/README.md diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0000-custom-health-checks/README.md new file mode 100644 index 00000000..aa3249d6 --- /dev/null +++ b/rfcs/0000-custom-health-checks/README.md @@ -0,0 +1,330 @@ +# RFC-0000 Custom Health Checks for Kustomization using Common Expression Language(CEL) + +**Status:** provisional + +**Creation date:** 2024-01-05 + +**Last update:** 2024-01-05 + +## Summary + +This RFC proposes to support customization of the status readers in `Kustomizations` +during the `healthCheck` phase for custom resources. The user will be able to declare +the needed `conditions` in order to compute a custom resource status. +In order to provide flexibility, we propose to use `CEL` expressions to declare +the expected conditions and their status. +This will introduce a new field `customHealthChecks` in the `Kustomization` CRD +which will be a list of `CustomHealthCheck` objects. + +## Motivation + +Flux uses the `Kstatus` library during the `healthCheck` phase to compute owned +resources status. This works just fine for all standard resources and custom resources +that comply with `Kstatus` interfaces. + +In the current Kustomization implementation, we have addressed such a problem for +kubernetes Jobs. We have implemented a `customJobStatusReader` that computes the +status of a Job based on a defined set of conditions. This is a good solution for +Jobs, but it is not generic and thus not applicable to other custom resources. + +Another use case is relying on non-standard `conditions` to compute the status of +a custom resource. For example, we might want to compute the status of a custom +resource based on a condtion other then `Ready`. This is the case for `Resources` +that do intermediate patching like `Certificate` where you should look at the `Issued` +condition to know if the certificate has been issued or not before looking at the +`Ready` condition. + +In order to provide a generic solution for custom resources, that would not imply +writing a custom status reader for each new custom resource, we need to provide a +way for the user to express the `conditions` that need to be met in order to compute +the status of a given custom resource. And we need to do this in a way that is +flexible enough to cover all possible use cases, without having to change `Flux` +source code for each new use case. + +### Goals + +- provide a generic solution for user to customize the health check of custom resources +- support non-standard resources in `kustomize-controller` + +### Non-Goals + +- We do not plan to support custom `healthChecks` for core resources. + +## Proposal + +### Introduce a new field `CustomHealthChecksExprs` in the `Kustomization` CRD + +The `CustomHealthChecksExprs` field will be a list of `CustomHealthCheck` objects. +Each `CustomHealthChecksExprs` object will have a `apiVersion`, `kind`, `inProgress`, +`failed` and `current` fields. + +To give an example, here is how we would declare a custom health check for a `Certificate` +resource: + +```yaml +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: app-certificate + namespace: cert-manager +spec: + commonName: cert-manager-tls + dnsNames: + - app.ns.svc.cluster.local + ipAddresses: + - x.x.x.x + isCA: true + issuerRef: + group: cert-manager.io + kind: ClusterIssuer + name: app-issuer + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + secretName: app-tls-certs + subject: + organizations: + - example.com +``` + +This `Certificate` resource will transition through the following `conditions`: +`Issuing` and `Ready`. + +In order to compute the status of this resource, we need to look at both the `Issuing` +and `Ready` conditions. + +The resulting `Kustomization` object will look like this: + +```yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1beta1 +kind: Kustomization +metadata: + name: application-kustomization +spec: + force: false + interval: 5m0s + path: ./overlays/application + prune: false + sourceRef: + kind: GitRepository + name: application-git + healthChecks: + - apiVersion: cert-manager.io/v1 + kind: Certificate + name: service-certificate + namespace: cert-manager + - apiVersion: apps/v1 + kind: Deployment + name: app + namespace: app + customHealthChecksExprs: + - apiVersion: cert-manager.io/v1 + kind: Certificate + inProgress: "status.conditions.filter(e, e.type == 'Issuing').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" + failed: "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'False')" + current: "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" +``` + +The `HealthChecks` field still contains the objects that should be included in +the health assessment. The `CustomHealthChecksExprs` field will be used to declare +the `conditions` that need to be met in order to compute the status of the custom resource. + +Note that all core resources are discarded from the `CustomHealthChecksExprs` field. + + +#### Provide an evaluator for `CEL` expressions for users + +We will provide a CEL environment that can be used by the user to evaluate `CEL` +expressions. Users will use it to test their expressions before applying them to +their `Kustomization` object. + +```shell +$ flux eval --api-version cert-manager.io/v1 --kind Certificate --in-progress "status.conditions.filter(e, e.type == 'Issuing').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" --failed "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'False')" --current "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" --file ./custom_resource.yaml +``` + +### User Stories + +#### Configure custom health checks for a custom resource + +> As a user of Flux, I want to be able to specify custom health checks for my +> custom resources, so that I can have more control over the status of my +> resources. + +#### Enable health checks support in Flux for non-standard resources + +> As a user of Flux, I want to be able to use the health check feature for +> non-standard resources, so that I can have more control over the status of my +> resources. + +### Alternatives + +We need an expression language that is flexible enough to cover all possible use +cases, without having to change `Flux` source code for each new use case. + +On alternative that have been considered is to use `cuelang` instead of `CEL`. +`cuelang` is a more powerful expression language, but it is also more complex and +requires more work to integrate with `Flux`. it also does not have any support in +`Kubernetes` yet while `CEL` is already used in `Kubernetes` and libraries are +available to use it. + +## Design Details + +### Introduce a new field `CustomHealthChecksExprs` in the `Kustomization` CRD + +The `api/v1/kustomization_types.go` file will be updated to add the `CustomHealthChecksExprs` +field to the `KustomizationSpec` struct. + +```go +type KustomizationSpec struct { +... + // A list of resources to be included in the health assessment. + // +optional + HealthChecks []meta.NamespacedObjectKindReference `json:"healthChecks,omitempty"` + + // A list of custom health checks expressed as CEL expressions. + // The CEL expression must evaluate to a boolean value. + // +optional + CustomHealthChecksExprs []CustomHealthCheckExprs `json:"customHealthChecksExprs,omitempty"` +... +} + +// CustomHealthCheckExprs defines the CEL expressions for custom health checks. +// The CEL expressions must evaluate to a boolean value. The expressions are used +// to determine the status of the custom resource. +type CustomHealthCheckExprs struct { + // apiVersion of the custom health check. + // +required + APIVersion string `json:"apiVersion"` + // Kind of the custom health check. + // +required + Kind string `json:"kind"` + // InProgress is the CEL expression that verifies that the status + // of the custom resource is in progress. + // +optional + InProgress string `json:"inProgress"` + // Failed is the CEL expression that verifies that the status + // of the custom resource is failed. + // +optional + Failed string `json:"failed"` + // Current is the CEL expression that verifies that the status + // of the custom resource is ready. + // +optional + Current string `json:"current"` +} +``` + +### Introduce a generic custom status reader + +Introduce a generic custom status reader that will be able to compute the status of +a custom resource based on a list of `conditions` that need to be met. + +```go +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/cli-utils/pkg/kstatus/polling/engine" + "sigs.k8s.io/cli-utils/pkg/kstatus/polling/event" + kstatusreaders "sigs.k8s.io/cli-utils/pkg/kstatus/polling/statusreaders" +) +type customGenericStatusReader struct { + genericStatusReader engine.StatusReader + gvk schema.GroupVersionKind +} + +func NewCustomGenericStatusReader(mapper meta.RESTMapper, gvk schema.GroupVersionKind, exprs map[string]string) engine.StatusReader { + genericStatusReader := kstatusreaders.NewGenericStatusReader(mapper, genericConditions(gvk.Kind, exprs)) + return &customJobStatusReader{ + genericStatusReader: genericStatusReader, + gvk: gvk, + } +} + +func (g *customGenericStatusReader) Supports(gk schema.GroupKind) bool { + return gk == g.gvk.GroupKind() +} + +func (g *customGenericStatusReader) ReadStatus(ctx context.Context, reader engine.ClusterReader, resource object.ObjMetadata) (*event.ResourceStatus, error) { + return g.genericStatusReader.ReadStatus(ctx, reader, resource) +} + +func (g *customGenericStatusReader) ReadStatusForObject(ctx context.Context, reader engine.ClusterReader, resource *unstructured.Unstructured) (*event.ResourceStatus, error) { + return g.genericStatusReader.ReadStatusForObject(ctx, reader, resource) +} +``` + +A `genericConditions` closure will takes a `kind` and a map of `CEL` expressions as parameters +and returns a function that takes an `Unstructured` object and returns a `status.Result` object. + +````go +import ( + "sigs.k8s.io/cli-utils/pkg/kstatus/status" + "github.com/fluxcd/pkg/runtime/cel" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +func genericConditions(kind string, exprs map[string]string) func(u *unstructured.Unstructured) (*status.Result, error) { + return func(u *unstructured.Unstructured) (*status.Result, error) { + obj := u.UnstructuredContent() + + for statusKey, expr := range exprs { + // Use CEL to evaluate the expression + result, err := cel.ProcessExpr(expr, obj) + if err != nil { + return nil, err + } + switch statusKey { + case status.CurrentStatus.String(): + // If the expression evaluates to true, we return the current status + case status.FailedStatus.String(): + // If the expression evaluates to true, we return the failed status + case status.InProgressStatus.String(): + // If the expression evaluates to true, we return the reconciling status + } + } + } +} +```` + +The generic status reader will be used by the `statusPoller` provided to the `reconciler` +to compute the status of the resources for the registered custom resources `kind`. + +We will provide a `CEL` environment that will use the Kubernetes CEL library to +evaluate the `CEL` expressions. + +### StatusPoller configuration + +The `reconciler` holds a `statusPoller` that is used to compute the status of the +resources during the `healthCheck` phase of the reconciliation. The `statusPoller` +is configured with a list of `statusReaders` that are used to compute the status +of the resources. + +The `statusPoller` is not configurable once instantiated. This means +that we cannot add new `statusReaders` to the `statusPoller` once it is created. +This is a problem for custom resources because we need to be able to add new +`statusReaders` for each new custom resource that is declared in the `Kustomization` +object's `customHealthChecksExprs` field. Fortunately, the `cli-utils` library has +been forked in the `fluxcd` organization and we can make a change to the `statusPoller` +exposed the `statusReaders` field so that we can add new `statusReaders` to it. + + +The `statusPoller` used by `kustomize-controller` will be updated for every reconciliation +in order to add new polling options for custom resources that have a `CustomHealthChecksExprs` +field defined in their `Kustomization` object. + +### K8s CEL Library + +The `K8s CEL Library` is a library that provides `CEL` functions to help in evaluating +`CEL` expressions on `Kubernetes` objects. + +Unfortunately, this means that we will need to follow the `K8s CEL Library` releases +in order to make sure that we are using the same version of the `CEL` library as +`Kubernetes`. As of the time of writing this RFC, the `K8s CEL Library` is using the +`v0.16.1` version of the `CEL` library while the latest version of the `CEL` library +is `v0.18.2`. This means that we will need to use the `v0.16.1` version of the `CEL` +library in order to be able to use the `K8s CEL Library`. + + +## Implementation History + +See current POC implementation under https://github.com/souleb/kustomize-controller/tree/cel-based-custom-health From 33465429176ed21912b112f96cf4d29517c58088 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Fri, 17 Jan 2025 11:05:29 +0200 Subject: [PATCH 2/8] Rework the custom health check spec Signed-off-by: Stefan Prodan --- rfcs/0000-custom-health-checks/README.md | 270 +++++++++-------------- 1 file changed, 105 insertions(+), 165 deletions(-) diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0000-custom-health-checks/README.md index aa3249d6..0942b835 100644 --- a/rfcs/0000-custom-health-checks/README.md +++ b/rfcs/0000-custom-health-checks/README.md @@ -4,42 +4,37 @@ **Creation date:** 2024-01-05 -**Last update:** 2024-01-05 +**Last update:** 2025-01-17 ## Summary -This RFC proposes to support customization of the status readers in `Kustomizations` -during the `healthCheck` phase for custom resources. The user will be able to declare -the needed `conditions` in order to compute a custom resource status. -In order to provide flexibility, we propose to use `CEL` expressions to declare -the expected conditions and their status. -This will introduce a new field `customHealthChecks` in the `Kustomization` CRD -which will be a list of `CustomHealthCheck` objects. +This RFC proposes to extend the Flux `Kustomization` API with custom health checks for +custom resources using the Common Expression Language (CEL). -## Motivation +In order to provide flexibility, we propose to use CEL expressions for defining the +conditions that need to be met in order to determine the status of a custom resource. +We will introduce a new field called `healthCheckExprs` in the `Kustomization` CRD +which will be a list of CEL expressions for evaluating the status of a particular +Kubernetes resource kind. -Flux uses the `Kstatus` library during the `healthCheck` phase to compute owned -resources status. This works just fine for all standard resources and custom resources -that comply with `Kstatus` interfaces. +## Motivation -In the current Kustomization implementation, we have addressed such a problem for -kubernetes Jobs. We have implemented a `customJobStatusReader` that computes the -status of a Job based on a defined set of conditions. This is a good solution for -Jobs, but it is not generic and thus not applicable to other custom resources. +Flux uses the `kstatus` library during the health check phase to compute owned +resources status. This works just fine for all the Kubernetes core resources +and custom resources that comply with the `kstatus` conventions. -Another use case is relying on non-standard `conditions` to compute the status of -a custom resource. For example, we might want to compute the status of a custom -resource based on a condtion other then `Ready`. This is the case for `Resources` +There are cases where the status of a custom resource does not follow the +`kstatus` conventions. For example, we might want to compute the status of a custom +resource based on a condition other than `Ready`. This is the case for resources that do intermediate patching like `Certificate` where you should look at the `Issued` condition to know if the certificate has been issued or not before looking at the `Ready` condition. In order to provide a generic solution for custom resources, that would not imply -writing a custom status reader for each new custom resource, we need to provide a -way for the user to express the `conditions` that need to be met in order to compute -the status of a given custom resource. And we need to do this in a way that is -flexible enough to cover all possible use cases, without having to change `Flux` -source code for each new use case. +writing a custom `kstatus` reader for each CRD, we need to provide a way for the user +to express the conditions that need to be met in order to determine the status. +And we need to do this in a way that is flexible enough to cover all possible use cases, +without having to change Flux source code for each new use case. ### Goals @@ -48,15 +43,15 @@ source code for each new use case. ### Non-Goals -- We do not plan to support custom `healthChecks` for core resources. +- We do not plan to support custom health checks for Kubernetes core resources. ## Proposal -### Introduce a new field `CustomHealthChecksExprs` in the `Kustomization` CRD +### Introduce a new field `HealthCheckExprs` in the `Kustomization` CRD -The `CustomHealthChecksExprs` field will be a list of `CustomHealthCheck` objects. -Each `CustomHealthChecksExprs` object will have a `apiVersion`, `kind`, `inProgress`, -`failed` and `current` fields. +The `HealthCheckExprs` field will be a list of `CustomHealthCheck` objects. +The `CustomHealthCheck` object fields would be: `apiVersion`, `kind`, `inProgress`, +`failed` and `current`. To give an example, here is how we would declare a custom health check for a `Certificate` resource: @@ -67,7 +62,6 @@ apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: app-certificate - namespace: cert-manager spec: commonName: cert-manager-tls dnsNames: @@ -79,10 +73,6 @@ spec: group: cert-manager.io kind: ClusterIssuer name: app-issuer - privateKey: - algorithm: RSA - encoding: PKCS1 - size: 2048 secretName: app-tls-certs subject: organizations: @@ -95,170 +85,155 @@ This `Certificate` resource will transition through the following `conditions`: In order to compute the status of this resource, we need to look at both the `Issuing` and `Ready` conditions. -The resulting `Kustomization` object will look like this: +The Flux `Kustomization` object used to apply the `Certificate` will look like this: ```yaml -apiVersion: kustomize.toolkit.fluxcd.io/v1beta1 +apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: application-kustomization + name: certs spec: - force: false - interval: 5m0s - path: ./overlays/application - prune: false + interval: 5m + prune: true sourceRef: kind: GitRepository - name: application-git - healthChecks: + name: flux-system + path: ./certs + wait: true + healthCheckExprs: - apiVersion: cert-manager.io/v1 kind: Certificate - name: service-certificate - namespace: cert-manager - - apiVersion: apps/v1 - kind: Deployment - name: app - namespace: app - customHealthChecksExprs: - - apiVersion: cert-manager.io/v1 - kind: Certificate - inProgress: "status.conditions.filter(e, e.type == 'Issuing').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" - failed: "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'False')" - current: "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" + inProgress: "status.conditions.filter(e, e.type == 'Issuing').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" + failed: "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'False')" + current: "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" ``` -The `HealthChecks` field still contains the objects that should be included in -the health assessment. The `CustomHealthChecksExprs` field will be used to declare -the `conditions` that need to be met in order to compute the status of the custom resource. +The `.spec.healthCheckExprs` field contains an entry for the `Certificate` kind, its `apiVersion`, +and the CEL expressions that need to be met in order to determine the health status of all custom resources +of this kind reconciled by the Flux `Kustomization`. -Note that all core resources are discarded from the `CustomHealthChecksExprs` field. +Note that all the Kubernetes core resources are discarded from the `healthCheckExprs` list. +### User Stories -#### Provide an evaluator for `CEL` expressions for users - -We will provide a CEL environment that can be used by the user to evaluate `CEL` -expressions. Users will use it to test their expressions before applying them to -their `Kustomization` object. +#### Configure health checks for non-standard custom resources -```shell -$ flux eval --api-version cert-manager.io/v1 --kind Certificate --in-progress "status.conditions.filter(e, e.type == 'Issuing').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" --failed "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'False')" --current "status.conditions.filter(e, e.type == 'Ready').all(e, e.observedGeneration == metadata.generation && e.status == 'True')" --file ./custom_resource.yaml -``` +> As a Flux user, I want to be able to specify health checks for +> custom resources that don't have a Ready condition, so that I can be notified +> when the status of my resources transitions to a failed state based on the evaluation +> of a different condition. -### User Stories +Using `.spec.healthCheckExprs`, Flux users have the ability to +specify the conditions that need to be met in order to determine the status of +a custom resource. This enables Flux to query any `.status` field, +besides the standard `Ready` condition, and evaluate it using a CEL expression. -#### Configure custom health checks for a custom resource +#### Use Flux dependencies for Kubernetes ClusterAPI -> As a user of Flux, I want to be able to specify custom health checks for my -> custom resources, so that I can have more control over the status of my -> resources. +> As a Flux user, I want to be able to use Flux dependencies bases on the +> readiness of ClusterAPI resources, so that I can ensure that my applications +> are deployed only when the ClusterAPI resources are ready. -#### Enable health checks support in Flux for non-standard resources +The ClusterAPI resources have a `Ready` condition, but this is set in the status +after the cluster is first created. Given this behavior, at creation time, Flux +cannot find any condition to evaluate the status of the ClusterAPI resources, +thus it considers them as static resources which are always ready. -> As a user of Flux, I want to be able to use the health check feature for -> non-standard resources, so that I can have more control over the status of my -> resources. +Using `.spec.healthCheckExprs`, Flux users can specify that the `Cluster` +kind is expected to have a `Ready` condition which will force Flux into waiting +for the ClusterAPI resources status to be populated. ### Alternatives We need an expression language that is flexible enough to cover all possible use -cases, without having to change `Flux` source code for each new use case. +cases, without having to change Flux source code for each new use case. -On alternative that have been considered is to use `cuelang` instead of `CEL`. -`cuelang` is a more powerful expression language, but it is also more complex and -requires more work to integrate with `Flux`. it also does not have any support in -`Kubernetes` yet while `CEL` is already used in `Kubernetes` and libraries are -available to use it. +An alternative that have been considered was to use `CUE` instead of `CEL`. +`CUE` lang is a more powerful expression language, but given the fact that +Kubernetes makes use of `CEL` for CRD validation and admission control, +we have decided to also use `CEL` in Flux in order to be consistent with +the Kubernetes ecosystem. ## Design Details -### Introduce a new field `CustomHealthChecksExprs` in the `Kustomization` CRD +### Introduce a new field `HealthCheckExprs` in the `Kustomization` CRD -The `api/v1/kustomization_types.go` file will be updated to add the `CustomHealthChecksExprs` +The `api/v1/kustomization_types.go` file will be updated to add the `HealthCheckExprs` field to the `KustomizationSpec` struct. ```go type KustomizationSpec struct { -... - // A list of resources to be included in the health assessment. - // +optional - HealthChecks []meta.NamespacedObjectKindReference `json:"healthChecks,omitempty"` - - // A list of custom health checks expressed as CEL expressions. - // The CEL expression must evaluate to a boolean value. // +optional - CustomHealthChecksExprs []CustomHealthCheckExprs `json:"customHealthChecksExprs,omitempty"` -... + HealthCheckExprs []CustomHealthCheck `json:"healthCheckExprs,omitempty"` } -// CustomHealthCheckExprs defines the CEL expressions for custom health checks. -// The CEL expressions must evaluate to a boolean value. The expressions are used -// to determine the status of the custom resource. -type CustomHealthCheckExprs struct { - // apiVersion of the custom health check. +type CustomHealthCheck struct { + // APIVersion of the custom resource under evaluation. // +required APIVersion string `json:"apiVersion"` - // Kind of the custom health check. + // Kind of the custom resource under evaluation. // +required Kind string `json:"kind"` - // InProgress is the CEL expression that verifies that the status - // of the custom resource is in progress. - // +optional - InProgress string `json:"inProgress"` - // Failed is the CEL expression that verifies that the status - // of the custom resource is failed. + // Current is the CEL expression that determines if the status + // of the custom resource has reached the desired state. + // +required + Current string `json:"current"` + // InProgress is the CEL expression that determines if the status + // of the custom resource has not yet reached the desired state. // +optional - Failed string `json:"failed"` - // Current is the CEL expression that verifies that the status - // of the custom resource is ready. + InProgress string `json:"inProgress,omitempty"` + // Failed is the CEL expression that determines if the status + // of the custom resource has failed to reach the desired state. // +optional - Current string `json:"current"` + Failed string `json:"failed,omitempty"` } ``` ### Introduce a generic custom status reader -Introduce a generic custom status reader that will be able to compute the status of -a custom resource based on a list of `conditions` that need to be met. +We'll Introduce a `StatusReader` that will be used to compute the status +of custom resources based on the `CEL` expressions provided in the `CustomHealthCheck`: ```go import ( "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/cli-utils/pkg/kstatus/polling/engine" - "sigs.k8s.io/cli-utils/pkg/kstatus/polling/event" - kstatusreaders "sigs.k8s.io/cli-utils/pkg/kstatus/polling/statusreaders" + "github.com/fluxcd/cli-utils/pkg/kstatus/polling/engine" + "github.com/fluxcd/cli-utils/pkg/kstatus/polling/event" + kstatusreaders "github.com/fluxcd/cli-utils/pkg/kstatus/polling/statusreaders" ) -type customGenericStatusReader struct { + +type CELStatusReader struct { genericStatusReader engine.StatusReader gvk schema.GroupVersionKind } -func NewCustomGenericStatusReader(mapper meta.RESTMapper, gvk schema.GroupVersionKind, exprs map[string]string) engine.StatusReader { +func NewCELStatusReader(mapper meta.RESTMapper, gvk schema.GroupVersionKind, exprs map[string]string) engine.StatusReader { genericStatusReader := kstatusreaders.NewGenericStatusReader(mapper, genericConditions(gvk.Kind, exprs)) - return &customJobStatusReader{ + return &CELStatusReader{ genericStatusReader: genericStatusReader, - gvk: gvk, + gvk: gvk, } } -func (g *customGenericStatusReader) Supports(gk schema.GroupKind) bool { +func (g *CELStatusReader) Supports(gk schema.GroupKind) bool { return gk == g.gvk.GroupKind() } -func (g *customGenericStatusReader) ReadStatus(ctx context.Context, reader engine.ClusterReader, resource object.ObjMetadata) (*event.ResourceStatus, error) { +func (g *CELStatusReader) ReadStatus(ctx context.Context, reader engine.ClusterReader, resource object.ObjMetadata) (*event.ResourceStatus, error) { return g.genericStatusReader.ReadStatus(ctx, reader, resource) } -func (g *customGenericStatusReader) ReadStatusForObject(ctx context.Context, reader engine.ClusterReader, resource *unstructured.Unstructured) (*event.ResourceStatus, error) { +func (g *CELStatusReader) ReadStatusForObject(ctx context.Context, reader engine.ClusterReader, resource *unstructured.Unstructured) (*event.ResourceStatus, error) { return g.genericStatusReader.ReadStatusForObject(ctx, reader, resource) } ``` -A `genericConditions` closure will takes a `kind` and a map of `CEL` expressions as parameters +The `genericConditions` function will take a `kind` and a map of `CEL` expressions as parameters and returns a function that takes an `Unstructured` object and returns a `status.Result` object. ````go import ( - "sigs.k8s.io/cli-utils/pkg/kstatus/status" + "github.com/fluxcd/cli-utils/pkg/kstatus/status" "github.com/fluxcd/pkg/runtime/cel" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) @@ -268,63 +243,28 @@ func genericConditions(kind string, exprs map[string]string) func(u *unstructure obj := u.UnstructuredContent() for statusKey, expr := range exprs { - // Use CEL to evaluate the expression + // Use CEL to evaluate the expression result, err := cel.ProcessExpr(expr, obj) if err != nil { return nil, err } switch statusKey { case status.CurrentStatus.String(): - // If the expression evaluates to true, we return the current status + // If the expression evaluates to true, we return the current status case status.FailedStatus.String(): - // If the expression evaluates to true, we return the failed status + // If the expression evaluates to true, we return the failed status case status.InProgressStatus.String(): - // If the expression evaluates to true, we return the reconciling status + // If the expression evaluates to true, we return the reconciling status } } } } ```` -The generic status reader will be used by the `statusPoller` provided to the `reconciler` -to compute the status of the resources for the registered custom resources `kind`. - -We will provide a `CEL` environment that will use the Kubernetes CEL library to -evaluate the `CEL` expressions. - -### StatusPoller configuration - -The `reconciler` holds a `statusPoller` that is used to compute the status of the -resources during the `healthCheck` phase of the reconciliation. The `statusPoller` -is configured with a list of `statusReaders` that are used to compute the status -of the resources. - -The `statusPoller` is not configurable once instantiated. This means -that we cannot add new `statusReaders` to the `statusPoller` once it is created. -This is a problem for custom resources because we need to be able to add new -`statusReaders` for each new custom resource that is declared in the `Kustomization` -object's `customHealthChecksExprs` field. Fortunately, the `cli-utils` library has -been forked in the `fluxcd` organization and we can make a change to the `statusPoller` -exposed the `statusReaders` field so that we can add new `statusReaders` to it. - - -The `statusPoller` used by `kustomize-controller` will be updated for every reconciliation -in order to add new polling options for custom resources that have a `CustomHealthChecksExprs` -field defined in their `Kustomization` object. - -### K8s CEL Library - -The `K8s CEL Library` is a library that provides `CEL` functions to help in evaluating -`CEL` expressions on `Kubernetes` objects. - -Unfortunately, this means that we will need to follow the `K8s CEL Library` releases -in order to make sure that we are using the same version of the `CEL` library as -`Kubernetes`. As of the time of writing this RFC, the `K8s CEL Library` is using the -`v0.16.1` version of the `CEL` library while the latest version of the `CEL` library -is `v0.18.2`. This means that we will need to use the `v0.16.1` version of the `CEL` -library in order to be able to use the `K8s CEL Library`. +The CEL status reader will be used by the `statusPoller` provided to the kustomize-controller `reconciler` +to compute the status of the resources for the registered custom resources GVKs. +We will implement a `CEL` environment that will use the Kubernetes CEL library to evaluate the `CEL` expressions. ## Implementation History -See current POC implementation under https://github.com/souleb/kustomize-controller/tree/cel-based-custom-health From f604d7f34255460ae69ed52cecae9c5e828e7f1a Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Fri, 17 Jan 2025 11:39:10 +0200 Subject: [PATCH 3/8] Add Custom Health Check Library to RFC Signed-off-by: Stefan Prodan --- rfcs/0000-custom-health-checks/README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0000-custom-health-checks/README.md index 0942b835..d6809312 100644 --- a/rfcs/0000-custom-health-checks/README.md +++ b/rfcs/0000-custom-health-checks/README.md @@ -38,8 +38,8 @@ without having to change Flux source code for each new use case. ### Goals -- provide a generic solution for user to customize the health check of custom resources -- support non-standard resources in `kustomize-controller` +- Provide a generic solution for users to customise the health check evaluation of custom resources. +- Provide a space for the community to contribute custom health checks for popular custom resources. ### Non-Goals @@ -114,6 +114,14 @@ of this kind reconciled by the Flux `Kustomization`. Note that all the Kubernetes core resources are discarded from the `healthCheckExprs` list. +### Custom Health Check Library + +To help users define custom health checks, we will provide on the [fluxcd.io](https://fluxcd.io) +website a library of custom health checks for popular custom resources. + +The Flux community will be able to contribute to this library by submitting pull requests +to the `fluxcd/website` repository. + ### User Stories #### Configure health checks for non-standard custom resources From e51e5df9da23cb74df635c90adff3c370a2de237 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Fri, 17 Jan 2025 12:09:10 +0200 Subject: [PATCH 4/8] Add `SealedSecret` example to RFC Signed-off-by: Stefan Prodan --- rfcs/0000-custom-health-checks/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0000-custom-health-checks/README.md index d6809312..33a9f832 100644 --- a/rfcs/0000-custom-health-checks/README.md +++ b/rfcs/0000-custom-health-checks/README.md @@ -136,6 +136,16 @@ specify the conditions that need to be met in order to determine the status of a custom resource. This enables Flux to query any `.status` field, besides the standard `Ready` condition, and evaluate it using a CEL expression. +Example for `SealedSecret` which has a `Synced` condition: + +```yaml + - apiVersion: bitnami.com/v1alpha1 + kind: SealedSecret + inProgress: "metadata.generation != status.observedGeneration" + failed: "status.conditions.filter(e, e.type == 'Synced').all(e, e.status == 'False')" + current: "status.conditions.filter(e, e.type == 'Synced').all(e, e.status == 'True')" +``` + #### Use Flux dependencies for Kubernetes ClusterAPI > As a Flux user, I want to be able to use Flux dependencies bases on the From 6f6d3fb26929c73ae3879fbe5157d26e1cd69d81 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Fri, 17 Jan 2025 12:23:41 +0200 Subject: [PATCH 5/8] Add ClusterAPI example to RFC Signed-off-by: Stefan Prodan --- rfcs/0000-custom-health-checks/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0000-custom-health-checks/README.md index 33a9f832..911a20c7 100644 --- a/rfcs/0000-custom-health-checks/README.md +++ b/rfcs/0000-custom-health-checks/README.md @@ -161,6 +161,16 @@ Using `.spec.healthCheckExprs`, Flux users can specify that the `Cluster` kind is expected to have a `Ready` condition which will force Flux into waiting for the ClusterAPI resources status to be populated. +Example for `Cluster`: + +```yaml + - apiVersion: cluster.x-k8s.io/v1beta1 + kind: Cluster + inProgress: "metadata.generation != status.observedGeneration" + failed: "status.conditions.filter(e, e.type == 'Ready').all(e, e.status == 'False')" + current: "status.conditions.filter(e, e.type == 'Ready').all(e, e.status == 'True')" +``` + ### Alternatives We need an expression language that is flexible enough to cover all possible use From 74d748f5479da78be705dea3b61f5fb4bb24cd89 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Fri, 17 Jan 2025 15:11:21 +0200 Subject: [PATCH 6/8] Explain the evaluation logic based on conditions Signed-off-by: Stefan Prodan --- rfcs/0000-custom-health-checks/README.md | 28 +++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0000-custom-health-checks/README.md index 911a20c7..305b5100 100644 --- a/rfcs/0000-custom-health-checks/README.md +++ b/rfcs/0000-custom-health-checks/README.md @@ -33,8 +33,8 @@ condition to know if the certificate has been issued or not before looking at th In order to provide a generic solution for custom resources, that would not imply writing a custom `kstatus` reader for each CRD, we need to provide a way for the user to express the conditions that need to be met in order to determine the status. -And we need to do this in a way that is flexible enough to cover all possible use cases, -without having to change Flux source code for each new use case. +It should be done in a way that is flexible enough to cover all possible use cases, +without having to change Flux source code for each new CRD. ### Goals @@ -120,7 +120,7 @@ To help users define custom health checks, we will provide on the [fluxcd.io](ht website a library of custom health checks for popular custom resources. The Flux community will be able to contribute to this library by submitting pull requests -to the `fluxcd/website` repository. +to the [fluxcd/website](https://github.com/fluxcd/website) repository. ### User Stories @@ -176,7 +176,7 @@ Example for `Cluster`: We need an expression language that is flexible enough to cover all possible use cases, without having to change Flux source code for each new use case. -An alternative that have been considered was to use `CUE` instead of `CEL`. +An alternative that has been considered was to use `CUE` instead of `CEL`. `CUE` lang is a more powerful expression language, but given the fact that Kubernetes makes use of `CEL` for CRD validation and admission control, we have decided to also use `CEL` in Flux in order to be consistent with @@ -217,6 +217,22 @@ type CustomHealthCheck struct { } ``` +If a CEL expression evaluation results in an error, for example looking for a field that does not exist, +the health check will fail. Users will be encouraged to test their expressions +in the [CEL Playground](https://playcel.undistro.io/). Here is where the community maintained +[library](#custom-health-check-library) will be super useful as some of the expressions might be complex. + +The `InProgress` expression is optional, when not specified the controller will determine +if the resource is in progress if both `Failed` and `Current` evaluate to `false`. +Moreover, if the `InProgress` expression is not specified and the custom resource has a +`.status.observedGeneration` field, the controller with compare it with the `.metadata.generation` +field to determine if the resource is in progress. + +The `Failed` expression is optional, when not specified the controller will keep evaluating the +`Current` expression until it returns `true`, and will give up after the timeout is reached. +Users will be encouraged to provide a `Failed` expression to avoid staling the reconciliation +loop until the timeout is reached. + ### Introduce a generic custom status reader We'll Introduce a `StatusReader` that will be used to compute the status @@ -274,7 +290,7 @@ func genericConditions(kind string, exprs map[string]string) func(u *unstructure // Use CEL to evaluate the expression result, err := cel.ProcessExpr(expr, obj) if err != nil { - return nil, err + // handle error } switch statusKey { case status.CurrentStatus.String(): @@ -285,6 +301,8 @@ func genericConditions(kind string, exprs map[string]string) func(u *unstructure // If the expression evaluates to true, we return the reconciling status } } + + // If none of the expressions evaluate to true, we return the reconciling status } } ```` From e4325961af3b1fcfeb0d2875d7a14144114fbcd8 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Wed, 22 Jan 2025 09:27:35 +0200 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Matheus Pimenta Signed-off-by: Stefan Prodan --- rfcs/0000-custom-health-checks/README.md | 48 ++++++++++++------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0000-custom-health-checks/README.md index 305b5100..52cb4686 100644 --- a/rfcs/0000-custom-health-checks/README.md +++ b/rfcs/0000-custom-health-checks/README.md @@ -12,7 +12,7 @@ This RFC proposes to extend the Flux `Kustomization` API with custom health chec custom resources using the Common Expression Language (CEL). In order to provide flexibility, we propose to use CEL expressions for defining the -conditions that need to be met in order to determine the status of a custom resource. +conditions that need to be met in order to determine the health of a custom resource. We will introduce a new field called `healthCheckExprs` in the `Kustomization` CRD which will be a list of CEL expressions for evaluating the status of a particular Kubernetes resource kind. @@ -26,15 +26,14 @@ and custom resources that comply with the `kstatus` conventions. There are cases where the status of a custom resource does not follow the `kstatus` conventions. For example, we might want to compute the status of a custom resource based on a condition other than `Ready`. This is the case for resources -that do intermediate patching like `Certificate` where you should look at the `Issued` -condition to know if the certificate has been issued or not before looking at the +that perform intermediary patching like `Certificate` from cert-manager, where one should look +at the `Issued` condition to know if the certificate has been issued or not before looking at the `Ready` condition. -In order to provide a generic solution for custom resources, that would not imply -writing a custom `kstatus` reader for each CRD, we need to provide a way for the user -to express the conditions that need to be met in order to determine the status. -It should be done in a way that is flexible enough to cover all possible use cases, -without having to change Flux source code for each new CRD. +We need to provide a way for users to express the conditions that need to be +met in order to determine the health of a custom resource. We seek a solution +flexible enough to cover all possible use cases that does not require changing +the Flux source code for each new CRD. ### Goals @@ -53,8 +52,7 @@ The `HealthCheckExprs` field will be a list of `CustomHealthCheck` objects. The `CustomHealthCheck` object fields would be: `apiVersion`, `kind`, `inProgress`, `failed` and `current`. -To give an example, here is how we would declare a custom health check for a `Certificate` -resource: +For example, consider the following `Certificate` resource: ```yaml --- @@ -112,7 +110,7 @@ The `.spec.healthCheckExprs` field contains an entry for the `Certificate` kind, and the CEL expressions that need to be met in order to determine the health status of all custom resources of this kind reconciled by the Flux `Kustomization`. -Note that all the Kubernetes core resources are discarded from the `healthCheckExprs` list. +Note that no Kubernetes core resources match the `healthCheckExprs` list. ### Custom Health Check Library @@ -132,7 +130,7 @@ to the [fluxcd/website](https://github.com/fluxcd/website) repository. > of a different condition. Using `.spec.healthCheckExprs`, Flux users have the ability to -specify the conditions that need to be met in order to determine the status of +specify the conditions that need to be met in order to determine the health of a custom resource. This enables Flux to query any `.status` field, besides the standard `Ready` condition, and evaluate it using a CEL expression. @@ -153,7 +151,7 @@ Example for `SealedSecret` which has a `Synced` condition: > are deployed only when the ClusterAPI resources are ready. The ClusterAPI resources have a `Ready` condition, but this is set in the status -after the cluster is first created. Given this behavior, at creation time, Flux +after the cluster is first created. Given this behavior, at creation time Flux cannot find any condition to evaluate the status of the ClusterAPI resources, thus it considers them as static resources which are always ready. @@ -222,15 +220,15 @@ the health check will fail. Users will be encouraged to test their expressions in the [CEL Playground](https://playcel.undistro.io/). Here is where the community maintained [library](#custom-health-check-library) will be super useful as some of the expressions might be complex. -The `InProgress` expression is optional, when not specified the controller will determine -if the resource is in progress if both `Failed` and `Current` evaluate to `false`. +The `InProgress` expression is optional, when not specified the controller will determine +if the resource is in progress if both `Failed` and `Current` do not evaluate to `true`. Moreover, if the `InProgress` expression is not specified and the custom resource has a `.status.observedGeneration` field, the controller with compare it with the `.metadata.generation` field to determine if the resource is in progress. The `Failed` expression is optional, when not specified the controller will keep evaluating the `Current` expression until it returns `true`, and will give up after the timeout is reached. -Users will be encouraged to provide a `Failed` expression to avoid staling the reconciliation +Users will be encouraged to provide a `Failed` expression to avoid stalling the reconciliation loop until the timeout is reached. ### Introduce a generic custom status reader @@ -240,10 +238,10 @@ of custom resources based on the `CEL` expressions provided in the `CustomHealth ```go import ( - "k8s.io/apimachinery/pkg/runtime/schema" - "github.com/fluxcd/cli-utils/pkg/kstatus/polling/engine" - "github.com/fluxcd/cli-utils/pkg/kstatus/polling/event" - kstatusreaders "github.com/fluxcd/cli-utils/pkg/kstatus/polling/statusreaders" + "k8s.io/apimachinery/pkg/runtime/schema" + "github.com/fluxcd/cli-utils/pkg/kstatus/polling/engine" + "github.com/fluxcd/cli-utils/pkg/kstatus/polling/event" + kstatusreaders "github.com/fluxcd/cli-utils/pkg/kstatus/polling/statusreaders" ) type CELStatusReader struct { @@ -283,7 +281,7 @@ import ( ) func genericConditions(kind string, exprs map[string]string) func(u *unstructured.Unstructured) (*status.Result, error) { - return func(u *unstructured.Unstructured) (*status.Result, error) { + return func(u *unstructured.Unstructured) (*status.Result, error) { obj := u.UnstructuredContent() for statusKey, expr := range exprs { @@ -294,15 +292,15 @@ func genericConditions(kind string, exprs map[string]string) func(u *unstructure } switch statusKey { case status.CurrentStatus.String(): - // If the expression evaluates to true, we return the current status + // If the expression evaluates to true, we return the Current status case status.FailedStatus.String(): - // If the expression evaluates to true, we return the failed status + // If the expression evaluates to true, we return the Failed status case status.InProgressStatus.String(): - // If the expression evaluates to true, we return the reconciling status + // If the expression evaluates to true, we return the InProgress status } } - // If none of the expressions evaluate to true, we return the reconciling status + // If none of the expressions evaluate to true, we return the InProgress status } } ```` From 9e6f72343669c095a29bd20d24a316bfd382170f Mon Sep 17 00:00:00 2001 From: Matheus Pimenta Date: Wed, 22 Jan 2025 14:29:59 +0000 Subject: [PATCH 8/8] Clarify expression evaluation logic Signed-off-by: Matheus Pimenta --- .../README.md | 111 ++++++++++-------- 1 file changed, 64 insertions(+), 47 deletions(-) rename rfcs/{0000-custom-health-checks => 0009-custom-health-checks}/README.md (77%) diff --git a/rfcs/0000-custom-health-checks/README.md b/rfcs/0009-custom-health-checks/README.md similarity index 77% rename from rfcs/0000-custom-health-checks/README.md rename to rfcs/0009-custom-health-checks/README.md index 52cb4686..ef143a2a 100644 --- a/rfcs/0000-custom-health-checks/README.md +++ b/rfcs/0009-custom-health-checks/README.md @@ -1,10 +1,10 @@ -# RFC-0000 Custom Health Checks for Kustomization using Common Expression Language(CEL) +# RFC-0009 Custom Health Checks for Kustomization using Common Expression Language (CEL) -**Status:** provisional +**Status:** implementable **Creation date:** 2024-01-05 -**Last update:** 2025-01-17 +**Last update:** 2025-01-23 ## Summary @@ -19,16 +19,16 @@ Kubernetes resource kind. ## Motivation -Flux uses the `kstatus` library during the health check phase to compute owned +Flux uses the `kstatus` library during the health check phase to compute owned resources status. This works just fine for all the Kubernetes core resources and custom resources that comply with the `kstatus` conventions. There are cases where the status of a custom resource does not follow the `kstatus` conventions. For example, we might want to compute the status of a custom resource based on a condition other than `Ready`. This is the case for resources -that perform intermediary patching like `Certificate` from cert-manager, where one should look -at the `Issued` condition to know if the certificate has been issued or not before looking at the -`Ready` condition. +that perform intermediary patching, like `Certificate` from cert-manager, where one +should look at the `Issuing` condition to know if the certificate is being issued or +not before looking at the `Ready` condition. We need to provide a way for users to express the conditions that need to be met in order to determine the health of a custom resource. We seek a solution @@ -77,7 +77,7 @@ spec: - example.com ``` -This `Certificate` resource will transition through the following `conditions`: +This `Certificate` resource will transition through the following `conditions`: `Issuing` and `Ready`. In order to compute the status of this resource, we need to look at both the `Issuing` @@ -110,15 +110,13 @@ The `.spec.healthCheckExprs` field contains an entry for the `Certificate` kind, and the CEL expressions that need to be met in order to determine the health status of all custom resources of this kind reconciled by the Flux `Kustomization`. -Note that no Kubernetes core resources match the `healthCheckExprs` list. - ### Custom Health Check Library To help users define custom health checks, we will provide on the [fluxcd.io](https://fluxcd.io) website a library of custom health checks for popular custom resources. The Flux community will be able to contribute to this library by submitting pull requests -to the [fluxcd/website](https://github.com/fluxcd/website) repository. +to the [fluxcd/website](https://github.com/fluxcd/website) repository. ### User Stories @@ -139,14 +137,13 @@ Example for `SealedSecret` which has a `Synced` condition: ```yaml - apiVersion: bitnami.com/v1alpha1 kind: SealedSecret - inProgress: "metadata.generation != status.observedGeneration" failed: "status.conditions.filter(e, e.type == 'Synced').all(e, e.status == 'False')" current: "status.conditions.filter(e, e.type == 'Synced').all(e, e.status == 'True')" ``` #### Use Flux dependencies for Kubernetes ClusterAPI -> As a Flux user, I want to be able to use Flux dependencies bases on the +> As a Flux user, I want to be able to use Flux dependencies bases on the > readiness of ClusterAPI resources, so that I can ensure that my applications > are deployed only when the ClusterAPI resources are ready. @@ -164,7 +161,6 @@ Example for `Cluster`: ```yaml - apiVersion: cluster.x-k8s.io/v1beta1 kind: Cluster - inProgress: "metadata.generation != status.observedGeneration" failed: "status.conditions.filter(e, e.type == 'Ready').all(e, e.status == 'False')" current: "status.conditions.filter(e, e.type == 'Ready').all(e, e.status == 'True')" ``` @@ -200,6 +196,11 @@ type CustomHealthCheck struct { // Kind of the custom resource under evaluation. // +required Kind string `json:"kind"` + + HealthCheckExpressions `json:",inline"` +} + +type HealthCheckExpressions struct { // Current is the CEL expression that determines if the status // of the custom resource has reached the desired state. // +required @@ -215,19 +216,26 @@ type CustomHealthCheck struct { } ``` -If a CEL expression evaluation results in an error, for example looking for a field that does not exist, +If a CEL expression evaluation results in an error, for example, looking for a field that does not exist, the health check will fail. Users will be encouraged to test their expressions -in the [CEL Playground](https://playcel.undistro.io/). Here is where the community maintained +in the [CEL Playground](https://playcel.undistro.io/). Here is where the community-maintained [library](#custom-health-check-library) will be super useful as some of the expressions might be complex. -The `InProgress` expression is optional, when not specified the controller will determine -if the resource is in progress if both `Failed` and `Current` do not evaluate to `true`. -Moreover, if the `InProgress` expression is not specified and the custom resource has a -`.status.observedGeneration` field, the controller with compare it with the `.metadata.generation` -field to determine if the resource is in progress. +The evaluation logic will be as follows. + +First, we check if the custom resource has a `.status.observedGeneration` field, if it does +we compare it with the `.metadata.generation` field to determine if the custom resource is in +progress. We consider it in progress if these fields differ, and don't evaluate any of the +expressions if that's the case. However, if these fields are equal there's no immediate +conclusion about the health of the custom resource, so we proceed with the evaluation. + +For each of the `InProgress`, `Failed` and `Current` expressions, we evaluate the expressions +that are specified (`InProgress` and `Failed` are optional) in this specific order and return +the respective status of the first expression that evaluates to `true`. If none of the +expressions evaluate to `true`, we consider the custom resource in progress. -The `Failed` expression is optional, when not specified the controller will keep evaluating the -`Current` expression until it returns `true`, and will give up after the timeout is reached. +When the `Failed` expression is not specified the controller will keep evaluating the +`Current` expression until it returns `true`, and will give up after the timeout defined in the Kustomization's `spec.timeout` field is reached. Users will be encouraged to provide a `Failed` expression to avoid stalling the reconciliation loop until the timeout is reached. @@ -242,6 +250,8 @@ import ( "github.com/fluxcd/cli-utils/pkg/kstatus/polling/engine" "github.com/fluxcd/cli-utils/pkg/kstatus/polling/event" kstatusreaders "github.com/fluxcd/cli-utils/pkg/kstatus/polling/statusreaders" + + kustomizev1 "github.com/fluxcd/kustomize-controller/api/v1" ) type CELStatusReader struct { @@ -249,7 +259,9 @@ type CELStatusReader struct { gvk schema.GroupVersionKind } -func NewCELStatusReader(mapper meta.RESTMapper, gvk schema.GroupVersionKind, exprs map[string]string) engine.StatusReader { +func NewCELStatusReader(mapper meta.RESTMapper, gvk schema.GroupVersionKind, + exprs *kustomizev1.HealthCheckExpressions) engine.StatusReader { + genericStatusReader := kstatusreaders.NewGenericStatusReader(mapper, genericConditions(gvk.Kind, exprs)) return &CELStatusReader{ genericStatusReader: genericStatusReader, @@ -270,40 +282,45 @@ func (g *CELStatusReader) ReadStatusForObject(ctx context.Context, reader engine } ``` -The `genericConditions` function will take a `kind` and a map of `CEL` expressions as parameters -and returns a function that takes an `Unstructured` object and returns a `status.Result` object. +The `genericConditions` function takes the set of `CEL` expressions and returns a +function that takes an `Unstructured` object and returns a `status.Result` object. -````go +```go import ( - "github.com/fluxcd/cli-utils/pkg/kstatus/status" - "github.com/fluxcd/pkg/runtime/cel" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "github.com/fluxcd/cli-utils/pkg/kstatus/status" + "github.com/fluxcd/pkg/runtime/cel" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) -func genericConditions(kind string, exprs map[string]string) func(u *unstructured.Unstructured) (*status.Result, error) { +func genericConditions(exprs *kustomizev1.HealthCheckExpressions) func(u *unstructured.Unstructured) (*status.Result, error) { return func(u *unstructured.Unstructured) (*status.Result, error) { obj := u.UnstructuredContent() - for statusKey, expr := range exprs { - // Use CEL to evaluate the expression - result, err := cel.ProcessExpr(expr, obj) - if err != nil { - // handle error - } - switch statusKey { - case status.CurrentStatus.String(): - // If the expression evaluates to true, we return the Current status - case status.FailedStatus.String(): - // If the expression evaluates to true, we return the Failed status - case status.InProgressStatus.String(): - // If the expression evaluates to true, we return the InProgress status + // if status.observedGeneration exists and differs from metadata.generation return status.InProgress + + for _, e := range []struct{ + expr string + status status.Status + }{ + {expr: exprs.InProgress, status: status.InProgress}, + {expr: exprs.Failed, status: status.Failed}, + {expr: exprs.Current, status: status.Current}, + } { + if e.expr != "" { + result, err := cel.EvaluateBooleanExpr(e.expr, obj) + if err != nil { + return nil, err + } + if result { + return &status.Result{Status: e.status}, nil + } } } - - // If none of the expressions evaluate to true, we return the InProgress status + + return &status.Result{Status: status.InProgress}, nil } } -```` +``` The CEL status reader will be used by the `statusPoller` provided to the kustomize-controller `reconciler` to compute the status of the resources for the registered custom resources GVKs.