From ba36b37ac13a8c2a6e83af660079b02bb1245351 Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Thu, 8 Aug 2024 15:36:25 +0200 Subject: [PATCH] flux diff artifact: Provide a non-semantic diff option. Artifacts may contain other files types, not just YAML files, meaning the semantic YAML diff provided by `dyff` is not a safe default. This change implements purely textual diffing using the `diff` command line tool. This tool can be overridden by users using the `FLUX_EXTERNAL_DIFF` environment variable. Users that store Kubernetes resource manifests in the artifact can re-enable the semantic YAML diff behavior using the `--semantic-diff yaml` flag. The arguments to the diff subcommand may be: * A directory * A .tar.gz or .tgz file * An OCI url * An individual file The two arguments to the command are treated the same way, allowing users to diff in either direction. Signed-off-by: Florian Forster --- cmd/flux/diff_artifact.go | 354 +++++++++++++++++++++++++++++++------- go.mod | 4 +- 2 files changed, 292 insertions(+), 66 deletions(-) diff --git a/cmd/flux/diff_artifact.go b/cmd/flux/diff_artifact.go index 0c5d4f99..25a8dfad 100644 --- a/cmd/flux/diff_artifact.go +++ b/cmd/flux/diff_artifact.go @@ -17,32 +17,43 @@ limitations under the License. package main import ( + "archive/tar" "bytes" + "compress/gzip" "context" "errors" "fmt" "io" "os" + "os/exec" + "path/filepath" + "sort" + "strings" oci "github.com/fluxcd/pkg/oci/client" sourcev1 "github.com/fluxcd/source-controller/api/v1beta2" "github.com/gonvenience/ytbx" + "github.com/google/shlex" "github.com/homeport/dyff/pkg/dyff" "github.com/spf13/cobra" + "golang.org/x/exp/maps" "github.com/fluxcd/flux2/v2/internal/flags" "github.com/fluxcd/flux2/v2/pkg/printers" ) -var ErrDiffArtifactChanged = errors.New("the remote and local artifact contents differ") +var ErrDiffArtifactChanged = errors.New("the artifact contents differ") var diffArtifactCmd = &cobra.Command{ - Use: "artifact", + Use: "artifact ", Short: "Diff Artifact", - Long: withPreviewNote(`The diff artifact command prints the diff between the remote OCI artifact and a local directory or file`), + Long: withPreviewNote(fmt.Sprintf( + "The diff artifact command prints the diff between the remote OCI artifact and a local directory or file.\n\n"+ + "You can overwrite the command used for diffing by setting the %q environment variable.", externalDiffVar)), Example: `# Check if local files differ from remote -flux diff artifact oci://ghcr.io/stefanprodan/manifests:podinfo:6.2.0 --path=./kustomize`, +flux diff artifact oci://ghcr.io/stefanprodan/manifests:podinfo:6.2.0 ./kustomize`, RunE: diffArtifactCmdRun, + Args: cobra.RangeArgs(1, 2), } type diffArtifactFlags struct { @@ -51,38 +62,62 @@ type diffArtifactFlags struct { provider flags.SourceOCIProvider ignorePaths []string brief bool + differ *semanticDiffFlag } var diffArtifactArgs = newDiffArtifactArgs() func newDiffArtifactArgs() diffArtifactFlags { + defaultDiffer := mustExternalDiff() + return diffArtifactFlags{ provider: flags.SourceOCIProvider(sourcev1.GenericOCIProvider), + + differ: &semanticDiffFlag{ + options: map[string]differ{ + "yaml": dyffBuiltin{ + opts: []dyff.CompareOption{ + dyff.IgnoreOrderChanges(false), + dyff.KubernetesEntityDetection(true), + }, + }, + "false": defaultDiffer, + }, + value: "false", + differ: defaultDiffer, + }, } } func init() { - diffArtifactCmd.Flags().StringVar(&diffArtifactArgs.path, "path", "", "path to the directory or file containing the Kubernetes manifests, or '-' to read from STDIN") + diffArtifactCmd.Flags().StringVar(&diffArtifactArgs.path, "path", "", "path to the directory or file containing the Kubernetes manifests (deprecated, use a second positional argument instead)") diffArtifactCmd.Flags().StringVar(&diffArtifactArgs.creds, "creds", "", "credentials for OCI registry in the format [:] if --provider is generic") diffArtifactCmd.Flags().Var(&diffArtifactArgs.provider, "provider", sourceOCIRepositoryArgs.provider.Description()) diffArtifactCmd.Flags().StringSliceVar(&diffArtifactArgs.ignorePaths, "ignore-paths", excludeOCI, "set paths to ignore in .gitignore format") - diffArtifactCmd.Flags().BoolVarP(&diffArtifactArgs.brief, "brief", "q", false, "Just print a line when the resources differ. Does not output a list of changes.") + diffArtifactCmd.Flags().BoolVarP(&diffArtifactArgs.brief, "brief", "q", false, "just print a line when the resources differ; does not output a list of changes") + diffArtifactCmd.Flags().Var(diffArtifactArgs.differ, "semantic-diff", "use a semantic diffing algorithm") + diffCmd.AddCommand(diffArtifactCmd) } func diffArtifactCmdRun(cmd *cobra.Command, args []string) error { + var from, to string + if len(args) < 1 { return fmt.Errorf("artifact URL is required") } - ociURL := args[0] + from = args[0] - if diffArtifactArgs.path == "" { - return errors.New("the '--path' flag is required") - } + switch { + case len(args) >= 2: + to = args[1] - url, err := oci.ParseArtifactURL(ociURL) - if err != nil { - return err + case diffArtifactArgs.path != "": + // for backwards compatibility + to = diffArtifactArgs.path + + default: + return errors.New("a second artifact is required") } ctx, cancel := context.WithTimeout(context.Background(), rootArgs.timeout) @@ -104,12 +139,20 @@ func diffArtifactCmdRun(cmd *cobra.Command, args []string) error { return fmt.Errorf("provider not supported: %w", err) } - if err := ociClient.LoginWithProvider(ctx, url, ociProvider); err != nil { - return fmt.Errorf("error during login with provider: %w", err) + if url, err := oci.ParseArtifactURL(from); err == nil { + if err := ociClient.LoginWithProvider(ctx, url, ociProvider); err != nil { + return fmt.Errorf("error during login with provider: %w", err) + } + } + + if url, err := oci.ParseArtifactURL(to); err == nil { + if err := ociClient.LoginWithProvider(ctx, url, ociProvider); err != nil { + return fmt.Errorf("error during login with provider: %w", err) + } } } - diff, err := diffArtifact(ctx, ociClient, url, diffArtifactArgs.path) + diff, err := diffArtifact(ctx, ociClient, from, to, diffArtifactArgs) if err != nil { return err } @@ -123,98 +166,281 @@ func diffArtifactCmdRun(cmd *cobra.Command, args []string) error { cmd.Print(diff) } - return fmt.Errorf("%q and %q: %w", ociURL, diffArtifactArgs.path, ErrDiffArtifactChanged) + return fmt.Errorf("%q and %q: %w", from, to, ErrDiffArtifactChanged) } -func diffArtifact(ctx context.Context, client *oci.Client, remoteURL, localPath string) (string, error) { - localFile, err := loadLocal(localPath) +func diffArtifact(ctx context.Context, client *oci.Client, from, to string, flags diffArtifactFlags) (string, error) { + fromDir, fromCleanup, err := loadArtifact(ctx, client, from) if err != nil { return "", err } + defer fromCleanup() - remoteFile, cleanup, err := loadRemote(ctx, client, remoteURL) + toDir, toCleanup, err := loadArtifact(ctx, client, to) if err != nil { return "", err } - defer cleanup() + defer toCleanup() - report, err := dyff.CompareInputFiles(remoteFile, localFile, - dyff.IgnoreOrderChanges(false), - dyff.KubernetesEntityDetection(true), - ) - if err != nil { - return "", fmt.Errorf("dyff.CompareInputFiles(): %w", err) + return flags.differ.Diff(ctx, fromDir, toDir) +} + +// loadArtifact ensures that the artifact is in a local directory that can be +// recursively diffed. If necessary, files are downloaded, extracted, and/or +// copied into temporary directories for this purpose. +func loadArtifact(ctx context.Context, client *oci.Client, path string) (dir string, cleanup func(), err error) { + fi, err := os.Stat(path) + if err == nil && fi.IsDir() { + return path, func() {}, nil } - if len(report.Diffs) == 0 { - return "", nil + if err == nil && fi.Mode().IsRegular() { + return loadArtifactFile(path) } - var buf bytes.Buffer + url, err := oci.ParseArtifactURL(path) + if err == nil { + return loadArtifactOCI(ctx, client, url) + } - if err := printers.NewDyffPrinter().Print(&buf, report); err != nil { - return "", fmt.Errorf("formatting dyff report: %w", err) + return "", nil, fmt.Errorf("%q: %w", path, os.ErrNotExist) +} + +// loadArtifactOCI pulls the remove artifact into a temporary directory. +func loadArtifactOCI(ctx context.Context, client *oci.Client, url string) (dir string, cleanup func(), err error) { + tmpDir, err := os.MkdirTemp("", "flux-diff-artifact") + if err != nil { + return "", nil, fmt.Errorf("could not create temporary directory: %w", err) } - return buf.String(), nil + cleanup = func() { + if err := os.RemoveAll(tmpDir); err != nil { + fmt.Fprintf(os.Stderr, "os.RemoveAll(%q): %v\n", tmpDir, err) + } + } + + if _, err := client.Pull(ctx, url, tmpDir); err != nil { + cleanup() + return "", nil, fmt.Errorf("Pull(%q): %w", url, err) + } + + return tmpDir, cleanup, nil } -func loadLocal(path string) (ytbx.InputFile, error) { - if ytbx.IsStdin(path) { - buf, err := io.ReadAll(os.Stdin) +// loadArtifactFile copies a file into a temporary directory to allow for recursive diffing. +// If path is a .tar.gz or .tgz file, the archive is extracted into a temporary directory. +// Otherwise the file is copied verbatim. +func loadArtifactFile(path string) (dir string, cleanup func(), err error) { + tmpDir, err := os.MkdirTemp("", "flux-diff-artifact") + if err != nil { + return "", nil, fmt.Errorf("could not create temporary directory: %w", err) + } + + cleanup = func() { + if err := os.RemoveAll(tmpDir); err != nil { + fmt.Fprintf(os.Stderr, "os.RemoveAll(%q): %v\n", tmpDir, err) + } + } + + if strings.HasSuffix(path, ".tar.gz") || strings.HasSuffix(path, ".tgz") { + if err := extractTo(path, tmpDir); err != nil { + cleanup() + return "", nil, err + } + } else { + fh, err := os.Open(path) if err != nil { - return ytbx.InputFile{}, fmt.Errorf("os.ReadAll(os.Stdin): %w", err) + cleanup() + return "", nil, fmt.Errorf("os.Open(%q): %w", path, err) } + defer fh.Close() - nodes, err := ytbx.LoadDocuments(buf) + name := filepath.Join(tmpDir, filepath.Base(path)) + if err := copyFile(fh, name); err != nil { + cleanup() + return "", nil, fmt.Errorf("os.Open(%q): %w", path, err) + } + } + + return tmpDir, cleanup, nil +} + +// extractTo extracts the .tar.gz / .tgz archive at archivePath into the destDir directory. +func extractTo(archivePath, destDir string) error { + archiveFH, err := os.Open(archivePath) + if err != nil { + return err + } + + gzipReader, err := gzip.NewReader(archiveFH) + if err != nil { + return fmt.Errorf("gzip.NewREader(): %w", err) + } + + tarReader := tar.NewReader(gzipReader) + + for { + header, err := tarReader.Next() + if errors.Is(err, io.EOF) { + break + } if err != nil { - return ytbx.InputFile{}, fmt.Errorf("ytbx.LoadDocuments(): %w", err) + return fmt.Errorf("tarReader.Next(): %w", err) } - return ytbx.InputFile{ - Location: "STDIN", - Documents: nodes, - }, nil + switch header.Typeflag { + case tar.TypeDir: + dir := filepath.Join(destDir, header.Name) + if err := os.Mkdir(dir, 0755); err != nil { + return fmt.Errorf("os.Mkdir(%q): %w", dir, err) + } + + case tar.TypeReg: + name := filepath.Join(destDir, header.Name) + if err := copyFile(tarReader, name); err != nil { + return fmt.Errorf("extracting %q: %w", header.Name, err) + } + + default: + logger.Warningf("unsupported tar type: %v", header.Typeflag) + } } - sb, err := os.Stat(path) + return nil +} + +func copyFile(from io.Reader, to string) error { + fh, err := os.Create(to) if err != nil { - return ytbx.InputFile{}, err + return fmt.Errorf("os.Create(%q): %w", to, err) } + defer fh.Close() - if sb.IsDir() { - return ytbx.LoadDirectory(path) + if _, err := io.Copy(fh, from); err != nil { + return fmt.Errorf("io.Copy(%q): %w", to, err) } - return ytbx.LoadFile(path) + return nil +} + +type differ interface { + // Diff compares the two local directories "to" and "from" and returns their differences, or an empty string if they are equal. + Diff(ctx context.Context, from, to string) (string, error) +} + +// externalDiffCommand implements the differ interface using an external diff command. +type externalDiffCommand struct { + name string + flags []string } -func loadRemote(ctx context.Context, client *oci.Client, url string) (ytbx.InputFile, func(), error) { - noopCleanup := func() {} +// externalDiffVar is the environment variable users can use to overwrite the external diff command. +const externalDiffVar = "FLUX_EXTERNAL_DIFF" - tmpDir, err := os.MkdirTemp("", "flux-diff-artifact") +// mustExternalDiff initializes an externalDiffCommand using the externalDiffVar environment variable. +func mustExternalDiff() externalDiffCommand { + cmdline := os.Getenv(externalDiffVar) + if cmdline == "" { + cmdline = "diff -ur" + } + + args, err := shlex.Split(cmdline) if err != nil { - return ytbx.InputFile{}, noopCleanup, fmt.Errorf("could not create temporary directory: %w", err) + panic(fmt.Sprintf("shlex.Split(%q): %v", cmdline, err)) } - cleanup := func() { - if err := os.RemoveAll(tmpDir); err != nil { - fmt.Fprintf(os.Stderr, "os.RemoveAll(%q): %v\n", tmpDir, err) - } + return externalDiffCommand{ + name: args[0], + flags: args[1:], } +} - if _, err := client.Pull(ctx, url, tmpDir); err != nil { - cleanup() - return ytbx.InputFile{}, noopCleanup, fmt.Errorf("Pull(%q): %w", url, err) +func (c externalDiffCommand) Diff(ctx context.Context, fromDir, toDir string) (string, error) { + var args []string + + args = append(args, c.flags...) + args = append(args, fromDir, toDir) + + cmd := exec.CommandContext(ctx, c.name, args...) + + var stdout bytes.Buffer + + cmd.Stdout = &stdout + cmd.Stderr = os.Stderr + + err := cmd.Run() + + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + // exit code 1 only means there was a difference => ignore + } else if err != nil { + return "", fmt.Errorf("executing %q: %w", c.name, err) } - inputFile, err := ytbx.LoadDirectory(tmpDir) + return stdout.String(), nil +} + +// dyffBuiltin implements the differ interface using `dyff`, a semantic diff for YAML documents. +type dyffBuiltin struct { + opts []dyff.CompareOption +} + +func (d dyffBuiltin) Diff(ctx context.Context, fromDir, toDir string) (string, error) { + fromFile, err := ytbx.LoadDirectory(fromDir) if err != nil { - cleanup() - return ytbx.InputFile{}, noopCleanup, fmt.Errorf("ytbx.LoadDirectory(%q): %w", tmpDir, err) + return "", fmt.Errorf("ytbx.LoadDirectory(%q): %w", fromDir, err) } - inputFile.Location = url + toFile, err := ytbx.LoadDirectory(toDir) + if err != nil { + return "", fmt.Errorf("ytbx.LoadDirectory(%q): %w", toDir, err) + } + + report, err := dyff.CompareInputFiles(fromFile, toFile, d.opts...) + if err != nil { + return "", fmt.Errorf("dyff.CompareInputFiles(): %w", err) + } + + if len(report.Diffs) == 0 { + return "", nil + } + + var buf bytes.Buffer + + if err := printers.NewDyffPrinter().Print(&buf, report); err != nil { + return "", fmt.Errorf("formatting dyff report: %w", err) + } + + return buf.String(), nil +} + +// semanticDiffFlag implements pflag.Value for choosing a semantic diffing algorithm. +type semanticDiffFlag struct { + options map[string]differ + value string + differ +} + +func (f *semanticDiffFlag) Set(s string) error { + d, ok := f.options[s] + if !ok { + return fmt.Errorf("invalid value: %q", s) + } + + f.value = s + f.differ = d + + return nil +} + +func (f *semanticDiffFlag) String() string { + return f.value +} + +func (f *semanticDiffFlag) Type() string { + keys := maps.Keys(f.options) + + sort.Strings(keys) - return inputFile, cleanup, nil + return strings.Join(keys, "|") } diff --git a/go.mod b/go.mod index 86b00a7c..fe76555a 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,7 @@ require ( github.com/gonvenience/ytbx v1.4.4 github.com/google/go-cmp v0.6.0 github.com/google/go-containerregistry v0.20.2 + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/hashicorp/go-cleanhttp v0.5.2 github.com/homeport/dyff v1.7.1 github.com/lucasb-eyer/go-colorful v1.2.0 @@ -50,6 +51,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/theckman/yacspin v0.13.12 golang.org/x/crypto v0.26.0 + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 golang.org/x/term v0.23.0 golang.org/x/text v0.17.0 k8s.io/api v0.31.0 @@ -144,7 +146,6 @@ require ( github.com/google/go-github/v64 v64.0.0 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/handlers v1.5.2 // indirect github.com/gorilla/mux v1.8.1 // indirect @@ -232,7 +233,6 @@ require ( go.opentelemetry.io/otel/trace v1.28.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.starlark.net v0.0.0-20231121155337-90ade8b19d09 // indirect - golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/net v0.28.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect golang.org/x/sync v0.8.0 // indirect