From 31f414a6ea32b25dee4bf5acd688971bf8618d57 Mon Sep 17 00:00:00 2001 From: Alejandro Menocal Date: Tue, 28 Apr 2026 18:14:21 -0500 Subject: [PATCH 1/4] create pkgs for external consumption --- README.md | 54 ++- cmd/root.go | 9 +- go.mod | 2 +- internal/archive/archive.go | 85 ---- internal/archive/archive_test.go | 111 ----- internal/commitremap/commitremap.go | 129 ----- internal/commitremap/commitremap_test.go | 86 ---- pkg/archive/archive.go | 277 +++++++++++ pkg/archive/archive_test.go | 572 +++++++++++++++++++++++ pkg/commitremap/commitremap.go | 133 ++++++ pkg/commitremap/commitremap_test.go | 292 ++++++++++++ 11 files changed, 1331 insertions(+), 419 deletions(-) delete mode 100644 internal/archive/archive.go delete mode 100644 internal/archive/archive_test.go delete mode 100644 internal/commitremap/commitremap.go delete mode 100644 internal/commitremap/commitremap_test.go create mode 100644 pkg/archive/archive.go create mode 100644 pkg/archive/archive_test.go create mode 100644 pkg/commitremap/commitremap.go create mode 100644 pkg/commitremap/commitremap_test.go diff --git a/README.md b/README.md index a74def6..fb260b7 100644 --- a/README.md +++ b/README.md @@ -38,4 +38,56 @@ old new ... ``` -You can provide a directory where your migration archive has been expanded, or a `.tar.gz` file, via the `--migration-archive` argument. In either case, this tool will produce a new migration archive in the current directory, appending `-REMAPPED` to the base filename of the original migration archive. \ No newline at end of file +You can provide a directory where your migration archive has been expanded, or a `.tar.gz` file, via the `--migration-archive` argument. In either case, this tool will produce a new migration archive in the current directory, appending `-REMAPPED` to the base filename of the original migration archive. + +## Using as a library + +The remap and archive packaging logic is also available as a Go library under `pkg/`: + +```go +package main + +import ( + "log" + + "github.com/mona-actions/gh-commit-remap/pkg/archive" + "github.com/mona-actions/gh-commit-remap/pkg/commitremap" +) + +func main() { + // 1. Extract a .tar.gz migration archive (auto-creates destDir). + extracted, err := archive.UnTar("migration-archive.tar.gz", "") + if err != nil { + log.Fatal(err) + } + + // 2. Parse the commit-map produced by `git filter-repo`. + commitMap, err := commitremap.ParseCommitMap("commit-map") + if err != nil { + log.Fatal(err) + } + + // 3. Rewrite SHAs in the archive's metadata JSON files. + if err := commitremap.ProcessFiles(extracted, commitremap.DefaultPrefixes(), commitMap); err != nil { + log.Fatal(err) + } + + // 4. Re-package to a path of your choosing. + if err := archive.ReTarDir(extracted, "migration-archive-REMAPPED.tar.gz"); err != nil { + log.Fatal(err) + } +} +``` + +### Known limitations + +- **Whole-string SHA match only.** `ProcessFiles` only rewrites JSON string + values that exactly equal a commit-map key. SHAs embedded in URLs + (`/commits/`), markdown bodies, or `_links.*.href` are not rewritten. + Tracked as a follow-up issue. +- **Limited prefix coverage.** `DefaultPrefixes` covers `pull_requests`, + `issues`, `issue_events`. Other archive files containing SHAs + (`commit_comments_*.json`, `pull_request_review*_*.json`, + `releases_*.json`'s `target_commitish`, etc.) are not processed by default. + Pass a custom `prefixes` slice to `ProcessFiles` to widen coverage, or + track the follow-up issue for a default-list expansion. \ No newline at end of file diff --git a/cmd/root.go b/cmd/root.go index f84b1c7..ae0628e 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -8,8 +8,8 @@ import ( "os" "strings" - "github.com/mona-actions/gh-commit-remap/internal/archive" - "github.com/mona-actions/gh-commit-remap/internal/commitremap" + "github.com/mona-actions/gh-commit-remap/pkg/archive" + "github.com/mona-actions/gh-commit-remap/pkg/commitremap" "github.com/spf13/cobra" ) @@ -34,9 +34,6 @@ var rootCmd = &cobra.Command{ log.Fatalf("Error parsing commit map: %v", err) } - // config to define the types of files to process - types := []string{"pull_requests", "issues", "issue_events"} - archivePath, _ := cmd.Flags().GetString("migration-archive") var extractedDir string @@ -53,7 +50,7 @@ var rootCmd = &cobra.Command{ extractedDir = archivePath } - if err := commitremap.ProcessFiles(extractedDir, types, commitMap); err != nil { + if err := commitremap.ProcessFiles(extractedDir, commitremap.DefaultPrefixes(), commitMap); err != nil { log.Fatal(err) } else { // Re-package the modified directory into a new archive diff --git a/go.mod b/go.mod index 9f088f5..e7cf800 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/mona-actions/gh-commit-remap -go 1.21 +go 1.24 require github.com/spf13/cobra v1.8.1 diff --git a/internal/archive/archive.go b/internal/archive/archive.go deleted file mode 100644 index 9f0e6f1..0000000 --- a/internal/archive/archive.go +++ /dev/null @@ -1,85 +0,0 @@ -package archive - -import ( - "fmt" - "os" - "os/exec" - "path/filepath" - "strings" -) - -// reTarFiles creates a new tar archive from the files in the given directory. -// The name of the archive is the same as the directory name. -func ReTar(archivePath string) (string, error) { - // Extract the directory name from the archivePath - dirName := filepath.Base(archivePath) - - // Create the name of the new archive with -REMAPPED suffix - archiveName := fmt.Sprintf("%s-REMAPPED.tar.gz", dirName) - - err := checkTarAvailability() - if err != nil { - return "", err - } - - // Create and run the tar command - tarCmd := exec.Command("tar", "-czf", archiveName, "-C", archivePath, ".") - err = tarCmd.Run() - if err != nil { - return "", fmt.Errorf("error re-tarring the files: %w", err) - } - - return archiveName, nil -} - -// UnTar extracts the given .tar.gz archive into the specified destination directory. -// If destDir is empty, a directory will be created in the current working directory -// using the archive base name (without the .tar.gz suffix). It returns the directory -// where the archive was extracted. -func UnTar(archiveFile, destDir string) (string, error) { - if archiveFile == "" { - return "", fmt.Errorf("archive file path is empty") - } - - if err := checkTarAvailability(); err != nil { - return "", err - } - - // Ensure the archive exists before attempting extraction - if _, err := os.Stat(archiveFile); err != nil { - return "", fmt.Errorf("cannot stat archive file: %w", err) - } - - // Determine destination directory if not provided - if destDir == "" { - base := filepath.Base(archiveFile) - // strip .tar.gz if present - base = strings.TrimSuffix(base, ".tar.gz") - if base == filepath.Base(archiveFile) { // no .tar.gz suffix - base = strings.TrimSuffix(base, ".tgz") - } - if base == "" { - base = "extracted" - } - destDir = base - } - - // Create destination directory if it does not exist - if err := os.MkdirAll(destDir, 0o755); err != nil { - return "", fmt.Errorf("failed to create destination directory: %w", err) - } - - // Run tar extraction - tarCmd := exec.Command("tar", "-xzf", archiveFile, "-C", destDir) - if err := tarCmd.Run(); err != nil { - return "", fmt.Errorf("error extracting archive: %w", err) - } - - return destDir, nil -} - -// checkTarAvailability checks if the 'tar' command is available in the system's PATH. -func checkTarAvailability() error { - _, err := exec.LookPath("tar") - return err -} diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go deleted file mode 100644 index 8634723..0000000 --- a/internal/archive/archive_test.go +++ /dev/null @@ -1,111 +0,0 @@ -package archive - -import ( - "bytes" - "os" - "os/exec" - "path/filepath" - "testing" -) - -func TestReTar(t *testing.T) { - // Create a temporary directory - tempDir, err := os.MkdirTemp("", "test") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer os.RemoveAll(tempDir) - - // Create a file in the temporary directory - tempFile := filepath.Join(tempDir, "testfile") - origContent := []byte("test") - if err := os.WriteFile(tempFile, origContent, 0644); err != nil { - t.Fatalf("Failed to write temp file: %v", err) - } - - // Call the function under test - tarFile, err := ReTar(tempDir) - if err != nil { - t.Fatalf("ReTar failed: %v", err) - } - - // Check if the tar file was created - if _, err := os.Stat(tarFile); os.IsNotExist(err) { - t.Fatalf("Tar file was not created: %v", err) - } - - // Ensure the tar file is removed after the test - defer os.Remove(tarFile) - - // Extract the tar file to a new directory - extractDir, err := os.MkdirTemp("", "extract") - if err != nil { - t.Fatalf("Failed to create extract directory: %v", err) - } - defer os.RemoveAll(extractDir) - - tarCmd := exec.Command("tar", "-xzf", tarFile, "-C", extractDir) - err = tarCmd.Run() - if err != nil { - t.Fatalf("Failed to extract tar file: %v", err) - } - - // Read the contents of the extracted file - extractedFile := filepath.Join(extractDir, "testfile") - extractedContent, err := os.ReadFile(extractedFile) - if err != nil { - t.Fatalf("Failed to read extracted file: %v", err) - } - - // Compare the contents of the original file and the extracted file - if !bytes.Equal(origContent, extractedContent) { - t.Fatalf("Original file content and extracted file content do not match") - } -} - -func TestUnTar(t *testing.T) { - // Setup: create temp dir with a file, tar it using ReTar, then UnTar into new dir - srcDir, err := os.MkdirTemp("", "untar-src") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(srcDir) - - content := []byte("hello world") - if err := os.WriteFile(filepath.Join(srcDir, "file.txt"), content, 0o644); err != nil { - t.Fatalf("failed to write file: %v", err) - } - - archiveName, err := ReTar(srcDir) - if err != nil { - t.Fatalf("ReTar failed: %v", err) - } - defer os.Remove(archiveName) - - // Extract using UnTar with empty destination (auto-generated) - destDir, err := UnTar(archiveName, "") - if err != nil { - t.Fatalf("UnTar failed: %v", err) - } - defer os.RemoveAll(destDir) - - extractedContent, err := os.ReadFile(filepath.Join(destDir, "file.txt")) - if err != nil { - t.Fatalf("failed to read extracted file: %v", err) - } - if !bytes.Equal(content, extractedContent) { - t.Fatalf("extracted content mismatch") - } -} - -func TestUnTarErrors(t *testing.T) { - // Non-existent archive - if _, err := UnTar("does-not-exist.tar.gz", ""); err == nil { - t.Fatalf("expected error for non-existent archive") - } - - // Empty archive path - if _, err := UnTar("", ""); err == nil { - t.Fatalf("expected error for empty archive path") - } -} diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go deleted file mode 100644 index 27e6ef0..0000000 --- a/internal/commitremap/commitremap.go +++ /dev/null @@ -1,129 +0,0 @@ -package commitremap - -import ( - "encoding/json" - "fmt" - "log" - "os" - "path/filepath" - "strings" -) - -// Struct to represent a single entry in the commit map -type CommitMapEntry struct { - Old string - New string -} - -// Parses the file and returns a map of old commit hashes to new commit hashes -func ParseCommitMap(filePath string) (*[]CommitMapEntry, error) { - commitMap := []CommitMapEntry{} - - // Read the commit-map file - content, err := os.ReadFile(filePath) - if err != nil { - return nil, err - } - // Split the file content into lines - lines := strings.Split(string(content), "\n") - - // Iterate over the lines and parse the old and new commit hashes - for _, line := range lines { - if strings.TrimSpace(line) == "" { - continue - } - - fields := strings.Fields(line) - if len(fields) != 2 { - return nil, fmt.Errorf("invalid line: %s", line) - } - - commitMap = append(commitMap, CommitMapEntry{ - Old: fields[0], - New: fields[1], - }) - } - return &commitMap, nil -} - -func ProcessFiles(archiveLocation string, prefixes []string, commitMap *[]CommitMapEntry) error { - - for _, prefix := range prefixes { - // Get a list of all files that match the pattern - files, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json")) - if err != nil { - log.Fatalf("Error getting files: %v", err) - } - - // Process each file - for _, file := range files { - log.Println("Processing file:", file) - - err := updateMetadataFile(file, commitMap) - if err != nil { - return fmt.Errorf("Error updating metadata file: %v; %v", file, err) - } - } - } - return nil -} - -func updateMetadataFile(filePath string, commitMap *[]CommitMapEntry) error { - // Read the JSON file - data, err := os.ReadFile(filePath) - if err != nil { - return fmt.Errorf("Error reading data: %v", err) - } - - var dataMap interface{} - err = json.Unmarshal(data, &dataMap) - if err != nil { - return fmt.Errorf("Error unmarshaling data: %v", err) - } - - // Iterate over the commit map and replace the old commit hashes with the new ones - for _, commit := range *commitMap { - replaceSHA(dataMap, commit.Old, commit.New) - } - - // Marshal the updated data to JSON and pretty print it - updatedData, err := json.MarshalIndent(dataMap, "", " ") - if err != nil { - return fmt.Errorf("Error marshaling updated data: %v", err) - } - - // Overwrite the original file with the updated data - err = os.WriteFile(filePath, updatedData, 0644) - if err != nil { - return fmt.Errorf("Error writing updated data: %v", err) - } - - return nil -} - -func replaceSHA(data interface{}, oldSHA string, newSHA string) { - if data == nil { - return - } - - switch v := data.(type) { - case map[string]interface{}: - for key, value := range v { - if str, ok := value.(string); ok && str == oldSHA { - v[key] = newSHA - } else { - replaceSHA(value, oldSHA, newSHA) - } - } - case []interface{}: - for i, value := range v { - if str, ok := value.(string); ok && str == oldSHA { - v[i] = newSHA - } else { - replaceSHA(value, oldSHA, newSHA) - } - } - default: - // Unsupported type, do nothing - } -} diff --git a/internal/commitremap/commitremap_test.go b/internal/commitremap/commitremap_test.go deleted file mode 100644 index 550dc13..0000000 --- a/internal/commitremap/commitremap_test.go +++ /dev/null @@ -1,86 +0,0 @@ -package commitremap - -import ( - "os" - "testing" -) - -func TestParseCommitMap(t *testing.T) { - tests := []struct { - name string - fileContent string - expected *[]CommitMapEntry - expectError bool - }{ - { - name: "Valid commit map", - fileContent: `oldSHA1 newSHA1 -oldSHA2 newSHA2 -oldSHA3 newSHA3`, - expected: &[]CommitMapEntry{ - {Old: "oldSHA1", New: "newSHA1"}, - {Old: "oldSHA2", New: "newSHA2"}, - {Old: "oldSHA3", New: "newSHA3"}, - }, - expectError: false, - }, - { - name: "Empty file", - fileContent: ``, - expected: &[]CommitMapEntry{}, - expectError: false, - }, - { - name: "Invalid line format", - fileContent: `oldSHA1 newSHA1 -invalidLine -oldSHA2 newSHA2`, - expected: nil, - expectError: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Create a temporary file - tmpfile, err := os.CreateTemp("", "commitmap") - if err != nil { - t.Fatalf("Failed to create temp file: %v", err) - } - defer os.Remove(tmpfile.Name()) - - // Write the test content to the temporary file - if _, err := tmpfile.WriteString(tt.fileContent); err != nil { - t.Fatalf("Failed to write to temp file: %v", err) - } - if err := tmpfile.Close(); err != nil { - t.Fatalf("Failed to close temp file: %v", err) - } - - // Call the function under test - result, err := ParseCommitMap(tmpfile.Name()) - - // Check for expected error - if tt.expectError { - if err == nil { - t.Fatalf("Expected error but got none") - } - return - } else { - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - } - - // Check the result - if len(*result) != len(*tt.expected) { - t.Fatalf("Expected %d entries, got %d", len(*tt.expected), len(*result)) - } - for i, entry := range *result { - if entry.Old != (*tt.expected)[i].Old || entry.New != (*tt.expected)[i].New { - t.Errorf("Expected entry %d to be %+v, got %+v", i, (*tt.expected)[i], entry) - } - } - }) - } -} diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go new file mode 100644 index 0000000..ab82d4c --- /dev/null +++ b/pkg/archive/archive.go @@ -0,0 +1,277 @@ +package archive + +import ( + "archive/tar" + "compress/gzip" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strings" +) + +// UnTar decompresses a .tar.gz file into destDir, returning the directory +// containing the extracted contents. If destDir is empty, a directory named +// after the archive (with .tar.gz or .tgz stripped) is created in the current +// working directory — note that this default-naming behavior is asymmetric +// with ReTarDir, which always requires an explicit outPath. destDir is +// created if it does not exist, and empty, ".", and relative destination paths +// are accepted. Extraction is sandboxed with os.Root: a destination path that +// is itself a symlink is allowed as the sandbox root, but pre-existing symlinks +// inside the destination tree are rejected as security boundary violations. +// UnTar returns an error if the archive cannot be opened, is not valid gzip/tar +// data, contains entries that escape destDir or use absolute paths, or contains +// unsupported entry types such as symlinks, devices, FIFOs, or hard links. +func UnTar(archiveFile, destDir string) (string, error) { + if archiveFile == "" { + return "", fmt.Errorf("archive file path is empty") + } + + if _, err := os.Stat(archiveFile); err != nil { + return "", fmt.Errorf("cannot stat archive file: %w", err) + } + + file, err := os.Open(archiveFile) + if err != nil { + return "", fmt.Errorf("cannot open archive file: %w", err) + } + defer file.Close() + + gzipReader, err := gzip.NewReader(file) + if err != nil { + return "", fmt.Errorf("error creating gzip reader: %w", err) + } + defer gzipReader.Close() + + if destDir == "" { + base := filepath.Base(archiveFile) + base = strings.TrimSuffix(base, ".tar.gz") + if base == filepath.Base(archiveFile) { + base = strings.TrimSuffix(base, ".tgz") + } + if base == "" { + base = "extracted" + } + destDir = base + } + + if err := os.MkdirAll(destDir, 0o755); err != nil { + return "", fmt.Errorf("failed to create destination directory: %w", err) + } + root, err := os.OpenRoot(destDir) + if err != nil { + return "", fmt.Errorf("failed to open destination root: %w", err) + } + defer root.Close() + + tarReader := tar.NewReader(gzipReader) + + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return "", fmt.Errorf("error reading tar entry: %w", err) + } + + if filepath.IsAbs(header.Name) { + return "", fmt.Errorf("archive entry %q has absolute path", header.Name) + } + if pathHasParentRef(header.Name) { + return "", fmt.Errorf("archive entry %q escapes destination", header.Name) + } + relPath := filepath.ToSlash(filepath.Clean(header.Name)) + + switch header.Typeflag { + case tar.TypeDir: + if err := root.MkdirAll(relPath, header.FileInfo().Mode().Perm()); err != nil { + return "", fmt.Errorf("failed to create directory %q: %w", relPath, err) + } + case tar.TypeReg: + if header.Size < 0 { + return "", fmt.Errorf("archive entry %q has negative size", header.Name) + } + parent := filepath.ToSlash(filepath.Dir(relPath)) + if err := root.MkdirAll(parent, 0o755); err != nil { + return "", fmt.Errorf("failed to create parent directory for %q: %w", relPath, err) + } + outFile, err := root.OpenFile(relPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, header.FileInfo().Mode().Perm()) + if err != nil { + return "", fmt.Errorf("failed to create file %q: %w", relPath, err) + } + _, copyErr := io.CopyN(outFile, tarReader, header.Size) + closeErr := outFile.Close() + if copyErr != nil { + return "", fmt.Errorf("failed to extract file %q: %w", relPath, copyErr) + } + if closeErr != nil { + return "", fmt.Errorf("failed to close file %q: %w", relPath, closeErr) + } + default: + return "", fmt.Errorf("unsupported tar entry type %d for %q", header.Typeflag, header.Name) + } + } + + return destDir, nil +} + +func pathHasParentRef(name string) bool { + for _, part := range strings.Split(filepath.ToSlash(name), "/") { + if part == ".." { + return true + } + } + return false +} + +// ReTarDir creates a .tar.gz archive at outPath from the contents of srcDir. +// The parent directory for outPath is created if it does not exist, and an +// existing outPath is overwritten. outPath must not resolve inside srcDir; an +// error is returned otherwise. +// +// Archive entries use a "./" prefix and a top-level "./" directory entry to +// match the byte-layout of the legacy `tar -czf {archive} -C {srcDir} .` shell +// command. This parity exists so consumers that compare archives byte-for-byte +// (e.g. CI pipelines diffing against archives produced by the previous +// shell-based implementation, or downstream importers like the GEI migration +// importer that historically consumed shell-tar output) keep working without +// modification. Standards-compliant tar readers treat "./foo" and "foo" +// identically; the prefix is purely about layout parity, not semantics. +// +// ReTarDir returns an error if srcDir cannot be read, is not a directory, +// outPath cannot be created, or any file cannot be written to the archive. +func ReTarDir(srcDir, outPath string) error { + info, err := os.Stat(srcDir) + if err != nil { + return fmt.Errorf("cannot stat source directory: %w", err) + } + if !info.IsDir() { + return fmt.Errorf("source path %q is not a directory", srcDir) + } + + absSrc, err := filepath.Abs(srcDir) + if err != nil { + return fmt.Errorf("failed to resolve source directory: %w", err) + } + absOut, err := filepath.Abs(outPath) + if err != nil { + return fmt.Errorf("failed to resolve archive path: %w", err) + } + absSrc = filepath.Clean(absSrc) + absOut = filepath.Clean(absOut) + if absOut == absSrc || strings.HasPrefix(absOut, absSrc+string(os.PathSeparator)) { + return fmt.Errorf("output path %q must not be inside source directory %q", outPath, srcDir) + } + + if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil { + return fmt.Errorf("failed to create archive parent directory: %w", err) + } + + outFile, err := os.Create(outPath) + if err != nil { + return fmt.Errorf("failed to create archive: %w", err) + } + defer func() { _ = outFile.Close() }() + + gzipWriter := gzip.NewWriter(outFile) + tarWriter := tar.NewWriter(gzipWriter) + + // Match legacy `tar -czf -C srcDir .` layout: emit a top-level "./" directory + // entry. We reuse tar.FileInfoHeader (same path used for child entries) so the + // root header is populated consistently (Uid/Gid/AccessTime/Format etc.) rather + // than being a hand-rolled subset. + rootHeader, err := tar.FileInfoHeader(info, "") + if err != nil { + return fmt.Errorf("failed to create root tar header: %w", err) + } + rootHeader.Name = "./" + if err := tarWriter.WriteHeader(rootHeader); err != nil { + return fmt.Errorf("failed to write root tar header: %w", err) + } + + walkErr := filepath.WalkDir(srcDir, func(path string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if path == srcDir { + return nil + } + // outPath is guaranteed to be outside srcDir by the up-front guard above, + // so filepath.WalkDir(srcDir, …) cannot yield it. The guard plus + // TestReTarDir_OutputInsideSrcDir / TestReTarDir_OutputEqualsSrcDir are the + // regression tripwire — no in-walk skip needed. + + info, err := entry.Info() + if err != nil { + return fmt.Errorf("failed to read file info for %q: %w", path, err) + } + + relPath, err := filepath.Rel(srcDir, path) + if err != nil { + return fmt.Errorf("failed to calculate archive path for %q: %w", path, err) + } + tarPath := filepath.ToSlash(relPath) + + header, err := tar.FileInfoHeader(info, "") + if err != nil { + return fmt.Errorf("failed to create tar header for %q: %w", path, err) + } + header.Name = "./" + tarPath + if info.IsDir() { + header.Name += "/" + } + + if err := tarWriter.WriteHeader(header); err != nil { + return fmt.Errorf("failed to write tar header for %q: %w", path, err) + } + + if !info.Mode().IsRegular() { + return nil + } + + inFile, err := os.Open(path) + if err != nil { + return fmt.Errorf("failed to open source file %q: %w", path, err) + } + _, copyErr := io.Copy(tarWriter, inFile) + closeErr := inFile.Close() + if copyErr != nil { + return fmt.Errorf("failed to write file %q to archive: %w", path, copyErr) + } + if closeErr != nil { + return fmt.Errorf("failed to close source file %q: %w", path, closeErr) + } + return nil + }) + if walkErr != nil { + return fmt.Errorf("failed to walk source directory: %w", walkErr) + } + + if err := tarWriter.Close(); err != nil { + return fmt.Errorf("failed to close tar writer: %w", err) + } + if err := gzipWriter.Close(); err != nil { + return fmt.Errorf("failed to close gzip writer: %w", err) + } + if err := outFile.Close(); err != nil { + return fmt.Errorf("failed to close archive: %w", err) + } + + return nil +} + +// ReTar preserves the legacy CLI behavior by writing +// -REMAPPED.tar.gz into the current working directory and +// returning that archive name. It returns an error if ReTarDir cannot create +// the archive. +// +// Deprecated: prefer ReTarDir. Will be removed in a future major version. +func ReTar(srcDir string) (string, error) { + archiveName := filepath.Base(srcDir) + "-REMAPPED.tar.gz" + if err := ReTarDir(srcDir, archiveName); err != nil { + return "", fmt.Errorf("error re-tarring the files: %w", err) + } + return archiveName, nil +} diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go new file mode 100644 index 0000000..437cc5c --- /dev/null +++ b/pkg/archive/archive_test.go @@ -0,0 +1,572 @@ +package archive + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "io" + "io/fs" + "os" + "path/filepath" + "reflect" + "sort" + "strings" + "testing" +) + +func buildTarGz(t *testing.T, entries []tar.Header, contents map[string][]byte) []byte { + t.Helper() + + var buf bytes.Buffer + gzipWriter := gzip.NewWriter(&buf) + tarWriter := tar.NewWriter(gzipWriter) + + for _, entry := range entries { + if entry.Typeflag == 0 { + entry.Typeflag = tar.TypeReg + } + if entry.Typeflag == tar.TypeReg && entry.Size == 0 { + entry.Size = int64(len(contents[entry.Name])) + } + if err := tarWriter.WriteHeader(&entry); err != nil { + t.Fatalf("failed to write tar header: %v", err) + } + if content, ok := contents[entry.Name]; ok { + if _, err := tarWriter.Write(content); err != nil { + t.Fatalf("failed to write tar content: %v", err) + } + } + } + + if err := tarWriter.Close(); err != nil { + t.Fatalf("failed to close tar writer: %v", err) + } + if err := gzipWriter.Close(); err != nil { + t.Fatalf("failed to close gzip writer: %v", err) + } + + return buf.Bytes() +} + +func writeArchive(t *testing.T, dir, name string, data []byte) string { + t.Helper() + + path := filepath.Join(dir, name) + if err := os.WriteFile(path, data, 0o644); err != nil { + t.Fatalf("failed to write archive: %v", err) + } + return path +} + +func collectListing(t *testing.T, root string) []string { + t.Helper() + + var entries []string + if err := filepath.WalkDir(root, func(path string, entry fs.DirEntry, err error) error { + if err != nil { + return err + } + if path == root { + return nil + } + rel, err := filepath.Rel(root, path) + if err != nil { + return err + } + rel = filepath.ToSlash(rel) + if entry.IsDir() { + rel += "/" + } + entries = append(entries, rel) + return nil + }); err != nil { + t.Fatalf("failed to collect listing: %v", err) + } + sort.Strings(entries) + return entries +} + +func TestReTar(t *testing.T) { + srcDir := t.TempDir() + + origContent := []byte("test") + if err := os.WriteFile(filepath.Join(srcDir, "testfile"), origContent, 0o644); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + tarFile, err := ReTar(srcDir) + if err != nil { + t.Fatalf("ReTar failed: %v", err) + } + defer os.Remove(tarFile) + + if _, err := os.Stat(tarFile); err != nil { + t.Fatalf("tar file was not created: %v", err) + } + + extractDir := t.TempDir() + if _, err := UnTar(tarFile, extractDir); err != nil { + t.Fatalf("UnTar failed: %v", err) + } + + extractedContent, err := os.ReadFile(filepath.Join(extractDir, "testfile")) + if err != nil { + t.Fatalf("failed to read extracted file: %v", err) + } + + if !bytes.Equal(origContent, extractedContent) { + t.Fatalf("original file content and extracted file content do not match") + } +} + +func TestUnTar(t *testing.T) { + srcDir := t.TempDir() + + content := []byte("hello world") + if err := os.WriteFile(filepath.Join(srcDir, "file.txt"), content, 0o644); err != nil { + t.Fatalf("failed to write file: %v", err) + } + + archiveName, err := ReTar(srcDir) + if err != nil { + t.Fatalf("ReTar failed: %v", err) + } + defer os.Remove(archiveName) + + destDir, err := UnTar(archiveName, "") + if err != nil { + t.Fatalf("UnTar failed: %v", err) + } + defer os.RemoveAll(destDir) + + extractedContent, err := os.ReadFile(filepath.Join(destDir, "file.txt")) + if err != nil { + t.Fatalf("failed to read extracted file: %v", err) + } + if !bytes.Equal(content, extractedContent) { + t.Fatalf("extracted content mismatch") + } +} + +func TestUnTarErrors(t *testing.T) { + if _, err := UnTar("does-not-exist.tar.gz", ""); err == nil { + t.Fatalf("expected error for non-existent archive") + } + + if _, err := UnTar("", ""); err == nil { + t.Fatalf("expected error for empty archive path") + } +} + +func TestUnTar_TarSlip(t *testing.T) { + archivePath := writeArchive(t, t.TempDir(), "evil.tar.gz", buildTarGz(t, []tar.Header{{ + Name: "../../etc/passwd", + Mode: 0o644, + Size: int64(len("evil")), + }}, map[string][]byte{"../../etc/passwd": []byte("evil")})) + + _, err := UnTar(archivePath, filepath.Join(t.TempDir(), "extract")) + if err == nil || !strings.Contains(err.Error(), "escapes destination") { + t.Fatalf("expected escapes destination error, got %v", err) + } +} + +func TestUnTar_AbsolutePath(t *testing.T) { + archivePath := writeArchive(t, t.TempDir(), "evil.tar.gz", buildTarGz(t, []tar.Header{{ + Name: "/tmp/evil", + Mode: 0o644, + Size: int64(len("evil")), + }}, map[string][]byte{"/tmp/evil": []byte("evil")})) + + _, err := UnTar(archivePath, filepath.Join(t.TempDir(), "extract")) + if err == nil || !strings.Contains(err.Error(), "absolute path") { + t.Fatalf("expected absolute path error, got %v", err) + } +} + +func TestUnTar_Symlink(t *testing.T) { + archivePath := writeArchive(t, t.TempDir(), "symlink.tar.gz", buildTarGz(t, []tar.Header{{ + Name: "link", + Mode: 0o777, + Typeflag: tar.TypeSymlink, + Linkname: "target", + }}, nil)) + + _, err := UnTar(archivePath, filepath.Join(t.TempDir(), "extract")) + if err == nil || !strings.Contains(err.Error(), "unsupported") { + t.Fatalf("expected unsupported error, got %v", err) + } +} + +func TestUnTar_NonGzip(t *testing.T) { + archivePath := writeArchive(t, t.TempDir(), "plain.tar.gz", []byte("not gzip")) + + _, err := UnTar(archivePath, filepath.Join(t.TempDir(), "extract")) + if err == nil { + t.Fatalf("expected error for non-gzip archive") + } +} + +func TestUnTar_CorruptGzip(t *testing.T) { + archivePath := writeArchive(t, t.TempDir(), "corrupt.tar.gz", []byte{0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 'g', 'a', 'r', 'b', 'a', 'g', 'e'}) + + _, err := UnTar(archivePath, filepath.Join(t.TempDir(), "extract")) + if err == nil { + t.Fatalf("expected error for corrupt gzip archive") + } +} + +func TestUnTar_EmptyArchive(t *testing.T) { + destDir := filepath.Join(t.TempDir(), "extract") + archivePath := writeArchive(t, t.TempDir(), "empty.tar.gz", buildTarGz(t, nil, nil)) + + returnedDir, err := UnTar(archivePath, destDir) + if err != nil { + t.Fatalf("UnTar failed: %v", err) + } + if returnedDir != destDir { + t.Fatalf("expected returned dir %q, got %q", destDir, returnedDir) + } + entries, err := os.ReadDir(destDir) + if err != nil { + t.Fatalf("failed to read dest dir: %v", err) + } + if len(entries) != 0 { + t.Fatalf("expected empty dest dir, got %d entries", len(entries)) + } +} + +func TestUnTar_PermissionsPreserved(t *testing.T) { + srcDir := t.TempDir() + if err := os.WriteFile(filepath.Join(srcDir, "secret.txt"), []byte("secret"), 0o600); err != nil { + t.Fatalf("failed to write source file: %v", err) + } + + archivePath := filepath.Join(t.TempDir(), "archive.tar.gz") + if err := ReTarDir(srcDir, archivePath); err != nil { + t.Fatalf("ReTarDir failed: %v", err) + } + destDir := filepath.Join(t.TempDir(), "extract") + if _, err := UnTar(archivePath, destDir); err != nil { + t.Fatalf("UnTar failed: %v", err) + } + + info, err := os.Stat(filepath.Join(destDir, "secret.txt")) + if err != nil { + t.Fatalf("failed to stat extracted file: %v", err) + } + if got := info.Mode().Perm(); got != 0o600 { + t.Fatalf("expected mode 0600, got %o", got) + } +} + +func TestUnTar_DestDirAutoCreated(t *testing.T) { + archivePath := writeArchive(t, t.TempDir(), "one.tar.gz", buildTarGz(t, []tar.Header{{ + Name: "file.txt", + Mode: 0o644, + Size: int64(len("content")), + }}, map[string][]byte{"file.txt": []byte("content")})) + destDir := filepath.Join(t.TempDir(), "nested", "path") + + if _, err := UnTar(archivePath, destDir); err != nil { + t.Fatalf("UnTar failed: %v", err) + } + if info, err := os.Stat(destDir); err != nil || !info.IsDir() { + t.Fatalf("expected dest dir to be created, info=%v err=%v", info, err) + } +} + +func TestUnTar_DotDestDir(t *testing.T) { + testUnTarRelativeDestDir(t, ".") +} + +func TestUnTar_DotSlashDestDir(t *testing.T) { + testUnTarRelativeDestDir(t, "./") +} + +func testUnTarRelativeDestDir(t *testing.T, destDir string) { + t.Helper() + + cwd := t.TempDir() + archivePath := writeArchive(t, cwd, "dot.tar.gz", buildTarGz(t, []tar.Header{{ + Name: "issues.json", + Mode: 0o644, + Size: int64(len("{}")), + }, { + Name: "nested", + Mode: 0o755, + Typeflag: tar.TypeDir, + }, { + Name: "nested/payload.txt", + Mode: 0o644, + Size: int64(len("payload")), + }}, map[string][]byte{ + "issues.json": []byte("{}"), + "nested/payload.txt": []byte("payload"), + })) + t.Chdir(cwd) + + returnedDir, err := UnTar(archivePath, destDir) + if err != nil { + t.Fatalf("UnTar failed: %v", err) + } + if returnedDir != destDir { + t.Fatalf("expected returned dir %q, got %q", destDir, returnedDir) + } + if got, err := os.ReadFile(filepath.Join(cwd, "issues.json")); err != nil || string(got) != "{}" { + t.Fatalf("expected issues.json in cwd, got %q err=%v", got, err) + } + if got, err := os.ReadFile(filepath.Join(cwd, "nested", "payload.txt")); err != nil || string(got) != "payload" { + t.Fatalf("expected nested payload in cwd, got %q err=%v", got, err) + } +} + +func TestUnTar_PreExistingSymlinkInDest(t *testing.T) { + baseDir := t.TempDir() + destDir := filepath.Join(baseDir, "dest") + outsideDir := filepath.Join(baseDir, "outside") + if err := os.Mkdir(destDir, 0o755); err != nil { + t.Fatalf("failed to create dest dir: %v", err) + } + if err := os.Mkdir(outsideDir, 0o755); err != nil { + t.Fatalf("failed to create outside dir: %v", err) + } + if err := os.Symlink(outsideDir, filepath.Join(destDir, "link")); err != nil { + t.Fatalf("failed to create symlink: %v", err) + } + archivePath := writeArchive(t, baseDir, "symlink-escape.tar.gz", buildTarGz(t, []tar.Header{{ + Name: "link/payload.txt", + Mode: 0o644, + Size: int64(len("escaped")), + }}, map[string][]byte{"link/payload.txt": []byte("escaped")})) + + if _, err := UnTar(archivePath, destDir); err == nil { + t.Fatalf("expected UnTar to reject pre-existing symlink in dest") + } + if _, err := os.Stat(filepath.Join(outsideDir, "payload.txt")); !os.IsNotExist(err) { + t.Fatalf("expected outside payload not to exist, stat err=%v", err) + } +} + +func TestUnTar_DestDirIsSymlink(t *testing.T) { + baseDir := t.TempDir() + targetDir := filepath.Join(baseDir, "target") + destDir := filepath.Join(baseDir, "dest") + if err := os.Mkdir(targetDir, 0o755); err != nil { + t.Fatalf("failed to create target dir: %v", err) + } + if err := os.Symlink(targetDir, destDir); err != nil { + t.Fatalf("failed to create dest symlink: %v", err) + } + archivePath := writeArchive(t, baseDir, "dest-symlink.tar.gz", buildTarGz(t, []tar.Header{{ + Name: "payload.txt", + Mode: 0o644, + Size: int64(len("payload")), + }}, map[string][]byte{"payload.txt": []byte("payload")})) + + if _, err := UnTar(archivePath, destDir); err != nil { + t.Fatalf("UnTar failed: %v", err) + } + if got, err := os.ReadFile(filepath.Join(targetDir, "payload.txt")); err != nil || string(got) != "payload" { + t.Fatalf("expected payload in symlink target, got %q err=%v", got, err) + } +} + +func TestReTarDir_RoundTrip(t *testing.T) { + srcDir := t.TempDir() + if err := os.WriteFile(filepath.Join(srcDir, "file.txt"), []byte("top-level"), 0o644); err != nil { + t.Fatalf("failed to write source file: %v", err) + } + if err := os.Mkdir(filepath.Join(srcDir, "nested"), 0o755); err != nil { + t.Fatalf("failed to create nested dir: %v", err) + } + if err := os.WriteFile(filepath.Join(srcDir, "nested", "inner.txt"), []byte("nested content"), 0o644); err != nil { + t.Fatalf("failed to write nested file: %v", err) + } + + archivePath := filepath.Join(t.TempDir(), "archive.tar.gz") + if err := ReTarDir(srcDir, archivePath); err != nil { + t.Fatalf("ReTarDir failed: %v", err) + } + destDir := filepath.Join(t.TempDir(), "extract") + if _, err := UnTar(archivePath, destDir); err != nil { + t.Fatalf("UnTar failed: %v", err) + } + + wantListing := []string{"file.txt", "nested/", "nested/inner.txt"} + if gotListing := collectListing(t, destDir); !reflect.DeepEqual(gotListing, wantListing) { + t.Fatalf("listing mismatch: got %v, want %v", gotListing, wantListing) + } + for _, rel := range []string{"file.txt", filepath.Join("nested", "inner.txt")} { + want, err := os.ReadFile(filepath.Join(srcDir, rel)) + if err != nil { + t.Fatalf("failed to read source %q: %v", rel, err) + } + got, err := os.ReadFile(filepath.Join(destDir, rel)) + if err != nil { + t.Fatalf("failed to read extracted %q: %v", rel, err) + } + if !bytes.Equal(got, want) { + t.Fatalf("content mismatch for %q", rel) + } + } +} + +func TestReTarDir_OutputInsideSrcDir(t *testing.T) { + srcDir := t.TempDir() + outPath := filepath.Join(srcDir, "out.tar.gz") + + err := ReTarDir(srcDir, outPath) + if err == nil || !strings.Contains(err.Error(), "must not be inside source directory") { + t.Fatalf("expected output-inside-source error, got %v", err) + } + if _, statErr := os.Stat(outPath); !os.IsNotExist(statErr) { + t.Fatalf("expected archive not to be created, stat err=%v", statErr) + } +} + +func TestReTarDir_OutputEqualsSrcDir(t *testing.T) { + srcDir := t.TempDir() + + err := ReTarDir(srcDir, srcDir) + if err == nil || !strings.Contains(err.Error(), "must not be inside source directory") { + t.Fatalf("expected output-equals-source error, got %v", err) + } +} + +func TestReTarDir_LegacyLayout(t *testing.T) { + srcDir := t.TempDir() + if err := os.WriteFile(filepath.Join(srcDir, "a.txt"), []byte("a"), 0o644); err != nil { + t.Fatalf("failed to write source file: %v", err) + } + if err := os.Mkdir(filepath.Join(srcDir, "sub"), 0o755); err != nil { + t.Fatalf("failed to create subdir: %v", err) + } + if err := os.WriteFile(filepath.Join(srcDir, "sub", "b.txt"), []byte("b"), 0o644); err != nil { + t.Fatalf("failed to write nested source file: %v", err) + } + + archivePath := filepath.Join(t.TempDir(), "archive.tar.gz") + if err := ReTarDir(srcDir, archivePath); err != nil { + t.Fatalf("ReTarDir failed: %v", err) + } + + archiveFile, err := os.Open(archivePath) + if err != nil { + t.Fatalf("failed to open archive: %v", err) + } + defer archiveFile.Close() + gzipReader, err := gzip.NewReader(archiveFile) + if err != nil { + t.Fatalf("failed to create gzip reader: %v", err) + } + defer gzipReader.Close() + tarReader := tar.NewReader(gzipReader) + + var headers []tar.Header + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("failed to read tar header: %v", err) + } + headers = append(headers, *header) + } + if len(headers) == 0 { + t.Fatalf("expected archive entries") + } + if headers[0].Name != "./" || headers[0].Typeflag != tar.TypeDir { + t.Fatalf("first entry mismatch: name=%q type=%d", headers[0].Name, headers[0].Typeflag) + } + + foundSubDir := false + for _, header := range headers[1:] { + if !strings.HasPrefix(header.Name, "./") { + t.Fatalf("entry %q missing ./ prefix", header.Name) + } + if header.Name == "./sub/" { + foundSubDir = true + } + } + if !foundSubDir { + t.Fatalf("expected ./sub/ directory entry with trailing slash, got %v", headers) + } +} + +func TestReTarDir_ParentDirCreated(t *testing.T) { + srcDir := t.TempDir() + if err := os.WriteFile(filepath.Join(srcDir, "file.txt"), []byte("content"), 0o644); err != nil { + t.Fatalf("failed to write source file: %v", err) + } + outPath := filepath.Join(t.TempDir(), "does", "not", "exist", "out.tar.gz") + + if err := ReTarDir(srcDir, outPath); err != nil { + t.Fatalf("ReTarDir failed: %v", err) + } + if _, err := os.Stat(outPath); err != nil { + t.Fatalf("expected archive to exist: %v", err) + } +} + +func TestReTarDir_SrcMissing(t *testing.T) { + outPath := filepath.Join(t.TempDir(), "out.tar.gz") + + if err := ReTarDir(filepath.Join(t.TempDir(), "missing"), outPath); err == nil { + t.Fatalf("expected error for missing source directory") + } +} + +func TestReTarDir_Overwrites(t *testing.T) { + srcDir := t.TempDir() + if err := os.WriteFile(filepath.Join(srcDir, "file.txt"), []byte("content"), 0o644); err != nil { + t.Fatalf("failed to write source file: %v", err) + } + outPath := filepath.Join(t.TempDir(), "out.tar.gz") + if err := os.WriteFile(outPath, []byte("garbage"), 0o644); err != nil { + t.Fatalf("failed to pre-create archive: %v", err) + } + + if err := ReTarDir(srcDir, outPath); err != nil { + t.Fatalf("ReTarDir failed: %v", err) + } + if _, err := UnTar(outPath, filepath.Join(t.TempDir(), "extract")); err != nil { + t.Fatalf("expected overwritten archive to extract cleanly: %v", err) + } +} + +func TestReTar_LegacyNamingInCWD(t *testing.T) { + cwd := t.TempDir() + oldCWD, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current directory: %v", err) + } + if err := os.Chdir(cwd); err != nil { + t.Fatalf("failed to change directory: %v", err) + } + t.Cleanup(func() { + if err := os.Chdir(oldCWD); err != nil { + t.Fatalf("failed to restore current directory: %v", err) + } + }) + srcDir := filepath.Join(t.TempDir(), "source-dir") + if err := os.Mkdir(srcDir, 0o755); err != nil { + t.Fatalf("failed to create source dir: %v", err) + } + if err := os.WriteFile(filepath.Join(srcDir, "file.txt"), []byte("content"), 0o644); err != nil { + t.Fatalf("failed to write source file: %v", err) + } + + archiveName, err := ReTar(srcDir) + if err != nil { + t.Fatalf("ReTar failed: %v", err) + } + wantName := filepath.Base(srcDir) + "-REMAPPED.tar.gz" + if archiveName != wantName { + t.Fatalf("expected archive name %q, got %q", wantName, archiveName) + } + if _, err := os.Stat(filepath.Join(cwd, wantName)); err != nil { + t.Fatalf("expected archive in cwd: %v", err) + } +} diff --git a/pkg/commitremap/commitremap.go b/pkg/commitremap/commitremap.go new file mode 100644 index 0000000..88826af --- /dev/null +++ b/pkg/commitremap/commitremap.go @@ -0,0 +1,133 @@ +package commitremap + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" +) + +// DefaultPrefixes returns the set of archive metadata file prefixes that +// gh-commit-remap rewrites by default. A fresh slice is returned on each +// call so callers can mutate the result without affecting other callers. +func DefaultPrefixes() []string { + return []string{"pull_requests", "issues", "issue_events"} +} + +type invalidCommitMapLineError struct { + line string + fields int +} + +func (e invalidCommitMapLineError) Error() string { + return fmt.Sprintf("line %q has %d fields", e.line, e.fields) +} + +// ParseCommitMap parses a commit-map file into a map of old to new SHAs. +// +// The file must contain one "old new" pair per line, matching the format git +// filter-repo emits. Whitespace-only lines are skipped, CRLF line endings are +// tolerated, and duplicate old SHAs are resolved with the last entry winning. +func ParseCommitMap(filePath string) (map[string]string, error) { + commitMap := make(map[string]string) + + content, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("reading commit map %s: %w", filePath, err) + } + + for _, line := range strings.Split(string(content), "\n") { + if strings.TrimSpace(line) == "" { + continue + } + + fields := strings.Fields(line) + if len(fields) != 2 { + lineErr := invalidCommitMapLineError{line: strings.TrimRight(line, "\r"), fields: len(fields)} + return nil, fmt.Errorf("invalid commit map line: %w", lineErr) + } + + commitMap[fields[0]] = fields[1] + } + + return commitMap, nil +} + +// ProcessFiles rewrites SHAs in JSON metadata files matching _*.json inside archiveDir. +// +// Each file is walked once, replacing string values that exactly match a key in +// commitMap. Only whole-string SHA values are replaced. SHAs embedded in URLs, +// markdown, or composite strings are not rewritten. +func ProcessFiles(archiveDir string, prefixes []string, commitMap map[string]string) error { + for _, prefix := range prefixes { + pattern := filepath.Join(archiveDir, prefix+"_*.json") + files, err := filepath.Glob(pattern) + if err != nil { + return fmt.Errorf("globbing %s: %w", pattern, err) + } + + for _, file := range files { + err := updateMetadataFile(file, commitMap) + if err != nil { + return fmt.Errorf("updating metadata file %s: %w", file, err) + } + } + } + + return nil +} + +func updateMetadataFile(filePath string, commitMap map[string]string) error { + data, err := os.ReadFile(filePath) + if err != nil { + return fmt.Errorf("reading data: %w", err) + } + + var dataMap interface{} + err = json.Unmarshal(data, &dataMap) + if err != nil { + return fmt.Errorf("unmarshaling data: %w", err) + } + + replaceSHA(dataMap, commitMap) + + updatedData, err := json.MarshalIndent(dataMap, "", " ") + if err != nil { + return fmt.Errorf("marshaling updated data: %w", err) + } + + err = os.WriteFile(filePath, updatedData, 0644) + if err != nil { + return fmt.Errorf("writing updated data: %w", err) + } + + return nil +} + +func replaceSHA(data interface{}, commitMap map[string]string) { + switch v := data.(type) { + case map[string]interface{}: + for key, value := range v { + if str, ok := value.(string); ok { + if newSHA, hit := commitMap[str]; hit { + v[key] = newSHA + } + continue + } + + replaceSHA(value, commitMap) + } + case []interface{}: + for i, value := range v { + if str, ok := value.(string); ok { + if newSHA, hit := commitMap[str]; hit { + v[i] = newSHA + } + continue + } + + replaceSHA(value, commitMap) + } + } +} diff --git a/pkg/commitremap/commitremap_test.go b/pkg/commitremap/commitremap_test.go new file mode 100644 index 0000000..e318092 --- /dev/null +++ b/pkg/commitremap/commitremap_test.go @@ -0,0 +1,292 @@ +package commitremap + +import ( + "encoding/json" + "errors" + "os" + "path/filepath" + "reflect" + "strings" + "testing" +) + +func TestParseCommitMap(t *testing.T) { + tests := []struct { + name string + content string + expected map[string]string + errContains string + }{ + { + name: "happy path", + content: "oldSHA1 newSHA1\n" + + "oldSHA2 newSHA2\n" + + "oldSHA3 newSHA3", + expected: map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + "oldSHA3": "newSHA3", + }, + }, + { + name: "empty file", + content: "", + expected: map[string]string{}, + }, + { + name: "whitespace-only lines mid-file", + content: "oldSHA1 newSHA1\n" + + " \t \n" + + "oldSHA2 newSHA2", + expected: map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + }, + }, + { + name: "CRLF and mixed line endings", + content: "oldSHA1 newSHA1\r\n" + + "oldSHA2 newSHA2\n" + + "oldSHA3 newSHA3\r\n", + expected: map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + "oldSHA3": "newSHA3", + }, + }, + { + name: "trailing newline", + content: "oldSHA1 newSHA1\n" + + "oldSHA2 newSHA2\n", + expected: map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + }, + }, + { + name: "blank lines mid-file", + content: "oldSHA1 newSHA1\n" + + "\n" + + "oldSHA2 newSHA2", + expected: map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + }, + }, + { + name: "duplicate old SHA last entry wins", + content: "oldSHA1 newSHA1\n" + + "oldSHA1 newerSHA1", + expected: map[string]string{ + "oldSHA1": "newerSHA1", + }, + }, + { + name: "malformed line with one field", + content: "oldSHA1 newSHA1\noffendingLine\noldSHA2 newSHA2", + errContains: "offendingLine", + }, + { + name: "malformed line with three fields", + content: "oldSHA1 newSHA1\noldSHA2 newSHA2 extra\noldSHA3 newSHA3", + errContains: "oldSHA2 newSHA2 extra", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filePath := filepath.Join(t.TempDir(), "commit-map") + if err := os.WriteFile(filePath, []byte(tt.content), 0644); err != nil { + t.Fatalf("write commit map fixture: %v", err) + } + + result, err := ParseCommitMap(filePath) + if tt.errContains != "" { + if err == nil { + t.Fatalf("expected error containing %q", tt.errContains) + } + if !strings.Contains(err.Error(), tt.errContains) { + t.Fatalf("expected error %q to contain %q", err.Error(), tt.errContains) + } + + var lineErr invalidCommitMapLineError + if !errors.As(err, &lineErr) { + t.Fatalf("expected error to wrap invalidCommitMapLineError, got %T: %v", err, err) + } + if lineErr.line != tt.errContains { + t.Fatalf("wrapped line = %q, want %q", lineErr.line, tt.errContains) + } + return + } + + if err != nil { + t.Fatalf("ParseCommitMap returned error: %v", err) + } + if result == nil { + t.Fatal("ParseCommitMap returned nil map") + } + if !reflect.DeepEqual(result, tt.expected) { + t.Fatalf("ParseCommitMap returned %#v, want %#v", result, tt.expected) + } + }) + } + + t.Run("nonexistent file", func(t *testing.T) { + _, err := ParseCommitMap(filepath.Join(t.TempDir(), "missing-commit-map")) + if err == nil { + t.Fatal("expected error for nonexistent file") + } + }) +} + +func TestProcessFiles(t *testing.T) { + t.Run("happy path", func(t *testing.T) { + dir := t.TempDir() + commitMap := map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + "oldSHA3": "newSHA3", + } + + fixtures := map[string]struct { + input string + want string + }{ + "pull_requests_000001.json": { + input: `{"sha":"oldSHA1","url":"https://example.invalid/oldSHA1","nested":[{"head":"oldSHA2","body":"mention oldSHA3"}],"untouched":"keep"}`, + want: `{"sha":"newSHA1","url":"https://example.invalid/oldSHA1","nested":[{"head":"newSHA2","body":"mention oldSHA3"}],"untouched":"keep"}`, + }, + "issues_000001.json": { + input: `[{"events":[{"commit_id":"oldSHA2"},{"commit_id":"unknownSHA"}],"title":"oldSHA2 in title"}]`, + want: `[{"events":[{"commit_id":"newSHA2"},{"commit_id":"unknownSHA"}],"title":"oldSHA2 in title"}]`, + }, + "issue_events_000001.json": { + input: `{"items":[{"payload":{"before":"oldSHA3","after":"oldSHA1"}}],"count":1}`, + want: `{"items":[{"payload":{"before":"newSHA3","after":"newSHA1"}}],"count":1}`, + }, + } + + for name, fixture := range fixtures { + writeFile(t, filepath.Join(dir, name), fixture.input) + } + + if err := ProcessFiles(dir, DefaultPrefixes(), commitMap); err != nil { + t.Fatalf("ProcessFiles returned error: %v", err) + } + + for name, fixture := range fixtures { + assertJSONFileEqual(t, filepath.Join(dir, name), fixture.want) + } + }) + + t.Run("empty commit map", func(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "pull_requests_000001.json") + want := `{"sha":"oldSHA1","nested":[{"sha":"oldSHA2"}]}` + writeFile(t, filePath, want) + + if err := ProcessFiles(dir, []string{"pull_requests"}, map[string]string{}); err != nil { + t.Fatalf("ProcessFiles returned error: %v", err) + } + + assertJSONFileEqual(t, filePath, want) + }) + + t.Run("no matching files", func(t *testing.T) { + if err := ProcessFiles(t.TempDir(), DefaultPrefixes(), map[string]string{"oldSHA1": "newSHA1"}); err != nil { + t.Fatalf("ProcessFiles returned error: %v", err) + } + }) + + t.Run("custom prefixes", func(t *testing.T) { + dir := t.TempDir() + fooPath := filepath.Join(dir, "foo_000001.json") + pullPath := filepath.Join(dir, "pull_requests_000001.json") + writeFile(t, fooPath, `{"sha":"oldSHA1"}`) + writeFile(t, pullPath, `{"sha":"oldSHA1"}`) + + if err := ProcessFiles(dir, []string{"foo"}, map[string]string{"oldSHA1": "newSHA1"}); err != nil { + t.Fatalf("ProcessFiles returned error: %v", err) + } + + assertJSONFileEqual(t, fooPath, `{"sha":"newSHA1"}`) + assertJSONFileEqual(t, pullPath, `{"sha":"oldSHA1"}`) + }) + + t.Run("single-pass behavior remaps all keys", func(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "pull_requests_000001.json") + writeFile(t, filePath, `{"items":[{"sha":"oldSHA1"},{"nested":{"sha":"oldSHA2","children":["oldSHA3","oldSHA4"]}}]}`) + + commitMap := map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + "oldSHA3": "newSHA3", + "oldSHA4": "newSHA4", + } + if err := ProcessFiles(dir, []string{"pull_requests"}, commitMap); err != nil { + t.Fatalf("ProcessFiles returned error: %v", err) + } + + assertJSONFileEqual(t, filePath, `{"items":[{"sha":"newSHA1"},{"nested":{"sha":"newSHA2","children":["newSHA3","newSHA4"]}}]}`) + }) + + t.Run("non matching strings unchanged", func(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "issues_000001.json") + original := `{"title":"no SHA here","body":"https://example.invalid/oldSHA1","labels":["bug","help wanted"]}` + writeFile(t, filePath, original) + + if err := ProcessFiles(dir, []string{"issues"}, map[string]string{"oldSHA1": "newSHA1"}); err != nil { + t.Fatalf("ProcessFiles returned error: %v", err) + } + + assertJSONFileEqual(t, filePath, original) + }) +} + +func TestDefaultPrefixes(t *testing.T) { + want := []string{"pull_requests", "issues", "issue_events"} + got := DefaultPrefixes() + if !reflect.DeepEqual(got, want) { + t.Fatalf("DefaultPrefixes() = %#v, want %#v", got, want) + } + + // Mutating the returned slice must not affect later calls. + got[0] = "mutated" + fresh := DefaultPrefixes() + if !reflect.DeepEqual(fresh, want) { + t.Fatalf("DefaultPrefixes() returned shared state; got %#v after caller mutation", fresh) + } +} + +func writeFile(t *testing.T, path string, content string) { + t.Helper() + + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatalf("write fixture %s: %v", path, err) + } +} + +func assertJSONFileEqual(t *testing.T, path string, want string) { + t.Helper() + + gotBytes, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + + var gotJSON interface{} + if err := json.Unmarshal(gotBytes, &gotJSON); err != nil { + t.Fatalf("unmarshal got JSON from %s: %v\n%s", path, err, string(gotBytes)) + } + + var wantJSON interface{} + if err := json.Unmarshal([]byte(want), &wantJSON); err != nil { + t.Fatalf("unmarshal want JSON: %v\n%s", err, want) + } + + if !reflect.DeepEqual(gotJSON, wantJSON) { + t.Fatalf("JSON in %s = %#v, want %#v", path, gotJSON, wantJSON) + } +} From c116c65b575390e28143e1951b94ea01292c9878 Mon Sep 17 00:00:00 2001 From: Alejandro Menocal Date: Tue, 28 Apr 2026 18:14:35 -0500 Subject: [PATCH 2/4] build uses go.mod instead of version add codeowners file bump to correct Go version --- .github/CODEOWNERS | 1 + .github/workflows/build.yml | 2 +- go.mod | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..58383b9 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @amenocal \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8de63f8..440a4a5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b #v4.1.4 - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 #v5.0.1 with: - go-version: 1.21 + go-version-file: go.mod - run: go get -v -t -d ./... - run: go build -v . - run: go test ./... --coverprofile=cover.out \ No newline at end of file diff --git a/go.mod b/go.mod index e7cf800..669708f 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/mona-actions/gh-commit-remap -go 1.24 +go 1.25 require github.com/spf13/cobra v1.8.1 From f5d58b3bf3e7fb6428a078a60efb9afdadc3e196 Mon Sep 17 00:00:00 2001 From: Alejandro Menocal Date: Wed, 29 Apr 2026 10:54:34 -0500 Subject: [PATCH 3/4] apply copilot suggestions --- README.md | 2 +- cmd/root.go | 9 ++- pkg/archive/archive.go | 80 +++++++------------ pkg/archive/archive_test.go | 151 ++++++++++++++++++++++++++++-------- 4 files changed, 152 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index fb260b7..8906aa0 100644 --- a/README.md +++ b/README.md @@ -90,4 +90,4 @@ func main() { (`commit_comments_*.json`, `pull_request_review*_*.json`, `releases_*.json`'s `target_commitish`, etc.) are not processed by default. Pass a custom `prefixes` slice to `ProcessFiles` to widen coverage, or - track the follow-up issue for a default-list expansion. \ No newline at end of file + track the follow-up issue for a default-list expansion. \ No newline at end of file diff --git a/cmd/root.go b/cmd/root.go index ae0628e..16bbef4 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -6,6 +6,7 @@ package cmd import ( "log" "os" + "path/filepath" "strings" "github.com/mona-actions/gh-commit-remap/pkg/archive" @@ -53,9 +54,11 @@ var rootCmd = &cobra.Command{ if err := commitremap.ProcessFiles(extractedDir, commitremap.DefaultPrefixes(), commitMap); err != nil { log.Fatal(err) } else { - // Re-package the modified directory into a new archive - tarPath, err := archive.ReTar(extractedDir) - if err != nil { + // Re-package the modified directory into a new archive in the + // current working directory using the legacy "-REMAPPED.tar.gz" + // naming convention. + tarPath := filepath.Base(extractedDir) + "-REMAPPED.tar.gz" + if err := archive.ReTarDir(extractedDir, tarPath); err != nil { log.Fatal(err) } log.Printf("New archive created: %s", tarPath) diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index ab82d4c..baa2f9c 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -11,18 +11,7 @@ import ( "strings" ) -// UnTar decompresses a .tar.gz file into destDir, returning the directory -// containing the extracted contents. If destDir is empty, a directory named -// after the archive (with .tar.gz or .tgz stripped) is created in the current -// working directory — note that this default-naming behavior is asymmetric -// with ReTarDir, which always requires an explicit outPath. destDir is -// created if it does not exist, and empty, ".", and relative destination paths -// are accepted. Extraction is sandboxed with os.Root: a destination path that -// is itself a symlink is allowed as the sandbox root, but pre-existing symlinks -// inside the destination tree are rejected as security boundary violations. -// UnTar returns an error if the archive cannot be opened, is not valid gzip/tar -// data, contains entries that escape destDir or use absolute paths, or contains -// unsupported entry types such as symlinks, devices, FIFOs, or hard links. +// UnTar decompresses a .tar.gz file into destDir, returning the directory containing the extracted contents. func UnTar(archiveFile, destDir string) (string, error) { if archiveFile == "" { return "", fmt.Errorf("archive file path is empty") @@ -127,22 +116,7 @@ func pathHasParentRef(name string) bool { } // ReTarDir creates a .tar.gz archive at outPath from the contents of srcDir. -// The parent directory for outPath is created if it does not exist, and an -// existing outPath is overwritten. outPath must not resolve inside srcDir; an -// error is returned otherwise. -// -// Archive entries use a "./" prefix and a top-level "./" directory entry to -// match the byte-layout of the legacy `tar -czf {archive} -C {srcDir} .` shell -// command. This parity exists so consumers that compare archives byte-for-byte -// (e.g. CI pipelines diffing against archives produced by the previous -// shell-based implementation, or downstream importers like the GEI migration -// importer that historically consumed shell-tar output) keep working without -// modification. Standards-compliant tar readers treat "./foo" and "foo" -// identically; the prefix is purely about layout parity, not semantics. -// -// ReTarDir returns an error if srcDir cannot be read, is not a directory, -// outPath cannot be created, or any file cannot be written to the archive. -func ReTarDir(srcDir, outPath string) error { +func ReTarDir(srcDir, outPath string) (retErr error) { info, err := os.Stat(srcDir) if err != nil { return fmt.Errorf("cannot stat source directory: %w", err) @@ -173,15 +147,26 @@ func ReTarDir(srcDir, outPath string) error { if err != nil { return fmt.Errorf("failed to create archive: %w", err) } - defer func() { _ = outFile.Close() }() - gzipWriter := gzip.NewWriter(outFile) tarWriter := tar.NewWriter(gzipWriter) - // Match legacy `tar -czf -C srcDir .` layout: emit a top-level "./" directory - // entry. We reuse tar.FileInfoHeader (same path used for child entries) so the - // root header is populated consistently (Uid/Gid/AccessTime/Format etc.) rather - // than being a hand-rolled subset. + // The success path closes each writer explicitly (in tar -> gzip -> file order) + var tarClosed, gzipClosed, fileClosed bool + defer func() { + if !tarClosed { + _ = tarWriter.Close() + } + if !gzipClosed { + _ = gzipWriter.Close() + } + if !fileClosed { + _ = outFile.Close() + } + if retErr != nil { + _ = os.Remove(outPath) + } + }() + rootHeader, err := tar.FileInfoHeader(info, "") if err != nil { return fmt.Errorf("failed to create root tar header: %w", err) @@ -198,10 +183,7 @@ func ReTarDir(srcDir, outPath string) error { if path == srcDir { return nil } - // outPath is guaranteed to be outside srcDir by the up-front guard above, - // so filepath.WalkDir(srcDir, …) cannot yield it. The guard plus - // TestReTarDir_OutputInsideSrcDir / TestReTarDir_OutputEqualsSrcDir are the - // regression tripwire — no in-walk skip needed. + // Skip the output archive file if it somehow appears in the walk (defensive; guarded above). info, err := entry.Info() if err != nil { @@ -214,6 +196,11 @@ func ReTarDir(srcDir, outPath string) error { } tarPath := filepath.ToSlash(relPath) + // Reject special files (symlinks, devices, FIFOs, sockets) + if !info.IsDir() && !info.Mode().IsRegular() { + return fmt.Errorf("unsupported file type for %q: only regular files and directories are archivable", path) + } + header, err := tar.FileInfoHeader(info, "") if err != nil { return fmt.Errorf("failed to create tar header for %q: %w", path, err) @@ -252,26 +239,15 @@ func ReTarDir(srcDir, outPath string) error { if err := tarWriter.Close(); err != nil { return fmt.Errorf("failed to close tar writer: %w", err) } + tarClosed = true if err := gzipWriter.Close(); err != nil { return fmt.Errorf("failed to close gzip writer: %w", err) } + gzipClosed = true if err := outFile.Close(); err != nil { return fmt.Errorf("failed to close archive: %w", err) } + fileClosed = true return nil } - -// ReTar preserves the legacy CLI behavior by writing -// -REMAPPED.tar.gz into the current working directory and -// returning that archive name. It returns an error if ReTarDir cannot create -// the archive. -// -// Deprecated: prefer ReTarDir. Will be removed in a future major version. -func ReTar(srcDir string) (string, error) { - archiveName := filepath.Base(srcDir) + "-REMAPPED.tar.gz" - if err := ReTarDir(srcDir, archiveName); err != nil { - return "", fmt.Errorf("error re-tarring the files: %w", err) - } - return archiveName, nil -} diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index 437cc5c..cafc253 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "reflect" + "runtime" "sort" "strings" "testing" @@ -86,7 +87,7 @@ func collectListing(t *testing.T, root string) []string { return entries } -func TestReTar(t *testing.T) { +func TestReTarDir_BasicRoundTrip(t *testing.T) { srcDir := t.TempDir() origContent := []byte("test") @@ -94,11 +95,10 @@ func TestReTar(t *testing.T) { t.Fatalf("failed to write temp file: %v", err) } - tarFile, err := ReTar(srcDir) - if err != nil { - t.Fatalf("ReTar failed: %v", err) + tarFile := filepath.Join(t.TempDir(), "out.tar.gz") + if err := ReTarDir(srcDir, tarFile); err != nil { + t.Fatalf("ReTarDir failed: %v", err) } - defer os.Remove(tarFile) if _, err := os.Stat(tarFile); err != nil { t.Fatalf("tar file was not created: %v", err) @@ -127,11 +127,10 @@ func TestUnTar(t *testing.T) { t.Fatalf("failed to write file: %v", err) } - archiveName, err := ReTar(srcDir) - if err != nil { - t.Fatalf("ReTar failed: %v", err) + archiveName := filepath.Join(t.TempDir(), "out.tar.gz") + if err := ReTarDir(srcDir, archiveName); err != nil { + t.Fatalf("ReTarDir failed: %v", err) } - defer os.Remove(archiveName) destDir, err := UnTar(archiveName, "") if err != nil { @@ -536,37 +535,121 @@ func TestReTarDir_Overwrites(t *testing.T) { } } -func TestReTar_LegacyNamingInCWD(t *testing.T) { - cwd := t.TempDir() - oldCWD, err := os.Getwd() - if err != nil { - t.Fatalf("failed to get current directory: %v", err) +func TestReTarDir_RemovesPartialArchiveOnError(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("chmod-based unreadable-dir setup is unreliable on Windows") } - if err := os.Chdir(cwd); err != nil { - t.Fatalf("failed to change directory: %v", err) + if os.Geteuid() == 0 { + t.Skip("running as root bypasses chmod restrictions") } - t.Cleanup(func() { - if err := os.Chdir(oldCWD); err != nil { - t.Fatalf("failed to restore current directory: %v", err) - } - }) - srcDir := filepath.Join(t.TempDir(), "source-dir") - if err := os.Mkdir(srcDir, 0o755); err != nil { - t.Fatalf("failed to create source dir: %v", err) + + srcDir := t.TempDir() + if err := os.WriteFile(filepath.Join(srcDir, "ok.txt"), []byte("hi"), 0o644); err != nil { + t.Fatalf("failed to write ok file: %v", err) } - if err := os.WriteFile(filepath.Join(srcDir, "file.txt"), []byte("content"), 0o644); err != nil { - t.Fatalf("failed to write source file: %v", err) + badDir := filepath.Join(srcDir, "unreadable") + if err := os.Mkdir(badDir, 0o755); err != nil { + t.Fatalf("failed to create subdir: %v", err) + } + if err := os.WriteFile(filepath.Join(badDir, "child.txt"), []byte("x"), 0o644); err != nil { + t.Fatalf("failed to write child file: %v", err) + } + if err := os.Chmod(badDir, 0); err != nil { + t.Fatalf("failed to chmod 0 subdir: %v", err) } + t.Cleanup(func() { _ = os.Chmod(badDir, 0o755) }) - archiveName, err := ReTar(srcDir) - if err != nil { - t.Fatalf("ReTar failed: %v", err) + outPath := filepath.Join(t.TempDir(), "archive.tar.gz") + err := ReTarDir(srcDir, outPath) + if err == nil { + t.Fatalf("expected ReTarDir to fail on unreadable subdir, got nil") } - wantName := filepath.Base(srcDir) + "-REMAPPED.tar.gz" - if archiveName != wantName { - t.Fatalf("expected archive name %q, got %q", wantName, archiveName) + if _, statErr := os.Stat(outPath); !os.IsNotExist(statErr) { + t.Fatalf("expected partial archive removed, stat err=%v", statErr) } - if _, err := os.Stat(filepath.Join(cwd, wantName)); err != nil { - t.Fatalf("expected archive in cwd: %v", err) +} + +func TestReTarDir_RejectsSymlink(t *testing.T) { + // This is the only test exercising C3's special-file rejection path + // (TestUnTar_ReTarDir_RoundTrip uses a regular-file/dir-only fixture). + // Windows is intentionally uncovered here because os.Symlink there + // requires elevated privileges; the rejection logic itself is + // platform-agnostic. + if runtime.GOOS == "windows" { + t.Skip("symlink creation requires elevated privileges on Windows") + } + + srcDir := t.TempDir() + if err := os.WriteFile(filepath.Join(srcDir, "real.txt"), []byte("real"), 0o644); err != nil { + t.Fatalf("failed to write real file: %v", err) + } + if err := os.Symlink("real.txt", filepath.Join(srcDir, "link.txt")); err != nil { + t.Fatalf("failed to create symlink: %v", err) + } + + outPath := filepath.Join(t.TempDir(), "archive.tar.gz") + err := ReTarDir(srcDir, outPath) + if err == nil || !strings.Contains(err.Error(), "unsupported file type") { + t.Fatalf("expected unsupported-file-type error, got %v", err) + } + if _, statErr := os.Stat(outPath); !os.IsNotExist(statErr) { + t.Fatalf("expected partial archive removed, stat err=%v", statErr) + } +} + +func TestUnTar_ReTarDir_RoundTrip(t *testing.T) { + // Start from a deterministic in-test fixture, run the full + // UnTar -> ReTarDir -> UnTar loop, and assert the file tree and + // byte contents survive the round trip. This is the symmetry + // tripwire between the two functions: any future drift (a tar type + // one side accepts but the other doesn't) trips this test. + headers := []tar.Header{ + {Name: "top.txt", Mode: 0o644}, + {Name: "nested/", Typeflag: tar.TypeDir, Mode: 0o755}, + {Name: "nested/inner.txt", Mode: 0o644}, + {Name: "nested/deep/", Typeflag: tar.TypeDir, Mode: 0o755}, + {Name: "nested/deep/leaf.txt", Mode: 0o644}, + } + contents := map[string][]byte{ + "top.txt": []byte("top-level content"), + "nested/inner.txt": []byte("inner content"), + "nested/deep/leaf.txt": []byte("leaf content"), + } + archiveBytes := buildTarGz(t, headers, contents) + srcArchive := filepath.Join(t.TempDir(), "src.tar.gz") + if err := os.WriteFile(srcArchive, archiveBytes, 0o644); err != nil { + t.Fatalf("failed to write source archive: %v", err) + } + + firstExtract := filepath.Join(t.TempDir(), "first") + if _, err := UnTar(srcArchive, firstExtract); err != nil { + t.Fatalf("first UnTar failed: %v", err) + } + + repacked := filepath.Join(t.TempDir(), "repacked.tar.gz") + if err := ReTarDir(firstExtract, repacked); err != nil { + t.Fatalf("ReTarDir failed: %v", err) + } + + secondExtract := filepath.Join(t.TempDir(), "second") + if _, err := UnTar(repacked, secondExtract); err != nil { + t.Fatalf("second UnTar failed: %v", err) + } + + wantListing := []string{"nested/", "nested/deep/", "nested/deep/leaf.txt", "nested/inner.txt", "top.txt"} + gotListing := collectListing(t, secondExtract) + sort.Strings(gotListing) + sort.Strings(wantListing) + if !reflect.DeepEqual(gotListing, wantListing) { + t.Fatalf("round-trip listing mismatch: got %v, want %v", gotListing, wantListing) + } + for rel, want := range contents { + got, err := os.ReadFile(filepath.Join(secondExtract, rel)) + if err != nil { + t.Fatalf("failed to read round-tripped %q: %v", rel, err) + } + if !bytes.Equal(got, want) { + t.Fatalf("content mismatch for %q after round trip: got %q want %q", rel, got, want) + } } } From 0f3cc5f35ce0cdfbef6485e30055d9f2b9bee967 Mon Sep 17 00:00:00 2001 From: Alejandro Menocal Date: Wed, 29 Apr 2026 14:42:18 -0500 Subject: [PATCH 4/4] adds summary of each run --- cmd/root.go | 95 ++++++++++++++------ go.mod | 17 +++- go.sum | 116 ++++++++++++++++++++++++ pkg/commitremap/commitremap.go | 71 +++++++++++---- pkg/commitremap/commitremap_test.go | 132 ++++++++++++++++++++++++++-- 5 files changed, 379 insertions(+), 52 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 16bbef4..50f9a1c 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,13 +4,15 @@ Copyright © 2024 NAME HERE package cmd import ( - "log" + "fmt" "os" "path/filepath" + "sort" "strings" "github.com/mona-actions/gh-commit-remap/pkg/archive" "github.com/mona-actions/gh-commit-remap/pkg/commitremap" + "github.com/pterm/pterm" "github.com/spf13/cobra" ) @@ -20,6 +22,38 @@ func init() { rootCmd.Flags().StringP("migration-archive", "m", "", "Path to the migration archive Example: /path/to/migration-archive.tar.gz") rootCmd.MarkFlagRequired("migration-archive") + + rootCmd.SilenceErrors = true + rootCmd.SilenceUsage = true +} + +func renderSummaryTable(stats commitremap.Stats, extractedDir string) { + if stats.FilesChanged() == 0 { + pterm.Info.Println("No files were modified — none of the SHAs in the archive matched the commit-map.") + return + } + + type row struct { + rel string + count int + } + rows := make([]row, 0, len(stats.PerFile)) + for absPath, count := range stats.PerFile { + rel, err := filepath.Rel(extractedDir, absPath) + if err != nil { + rel = filepath.Base(absPath) + } + rows = append(rows, row{rel: rel, count: count}) + } + sort.Slice(rows, func(i, j int) bool { return rows[i].rel < rows[j].rel }) + + data := pterm.TableData{{"File", "SHAs rewritten"}} + for _, r := range rows { + data = append(data, []string{r.rel, fmt.Sprintf("%d", r.count)}) + } + if err := pterm.DefaultTable.WithHasHeader().WithData(data).Render(); err != nil { + pterm.Warning.Printfln("failed to render summary table: %v", err) + } } // rootCmd represents the base command when called without any subcommands @@ -28,11 +62,11 @@ var rootCmd = &cobra.Command{ Short: "remaps commit hashes in a GitHub archive", Long: `Is a CLI tool that can remap commits hashed after performing a history re-write when performing a migration For exam`, - Run: func(cmd *cobra.Command, args []string) { + RunE: func(cmd *cobra.Command, args []string) error { mapPath, _ := cmd.Flags().GetString("mapping-file") commitMap, err := commitremap.ParseCommitMap(mapPath) if err != nil { - log.Fatalf("Error parsing commit map: %v", err) + return fmt.Errorf("parsing commit map: %w", err) } archivePath, _ := cmd.Flags().GetString("migration-archive") @@ -41,42 +75,53 @@ var rootCmd = &cobra.Command{ untarAndRetar := strings.HasSuffix(archivePath, ".tar.gz") if untarAndRetar { - // Extract the provided migration archive so we can modify its JSON contents + pterm.DefaultSection.Println("Extract") + spinner, _ := pterm.DefaultSpinner.Start("Extracting archive...") extractedDir, err = archive.UnTar(archivePath, "") if err != nil { - log.Fatalf("Error extracting migration archive: %v", err) + spinner.Fail("Extraction failed") + return fmt.Errorf("extracting migration archive: %w", err) } + defer func() { + if err := os.RemoveAll(extractedDir); err != nil { + pterm.Warning.Printfln("failed to remove extracted directory %s: %v", extractedDir, err) + } + }() + spinner.Success(fmt.Sprintf("Extracted to %s", extractedDir)) } else { - // Treat provided path as an already-extracted directory extractedDir = archivePath } - if err := commitremap.ProcessFiles(extractedDir, commitremap.DefaultPrefixes(), commitMap); err != nil { - log.Fatal(err) - } else { - // Re-package the modified directory into a new archive in the - // current working directory using the legacy "-REMAPPED.tar.gz" - // naming convention. - tarPath := filepath.Base(extractedDir) + "-REMAPPED.tar.gz" - if err := archive.ReTarDir(extractedDir, tarPath); err != nil { - log.Fatal(err) - } - log.Printf("New archive created: %s", tarPath) - if untarAndRetar { - // Cleanup extracted directory after successful re-tar - if err := os.RemoveAll(extractedDir); err != nil { - log.Printf("Warning: failed to remove extracted directory %s: %v", extractedDir, err) - } - } + pterm.DefaultSection.Println("Remap") + remapSpinner, _ := pterm.DefaultSpinner.Start("Remapping SHAs...") + stats, err := commitremap.ProcessFiles(extractedDir, commitremap.DefaultPrefixes(), commitMap) + if err != nil { + remapSpinner.Fail("Remap failed") + renderSummaryTable(stats, extractedDir) + return fmt.Errorf("remapping SHAs: %w", err) } + remapSpinner.Success(fmt.Sprintf("Remapped %d SHAs across %d files (scanned %d)", stats.TotalReplacements(), stats.FilesChanged(), stats.FilesScanned)) + renderSummaryTable(stats, extractedDir) + + pterm.DefaultSection.Println("Repack") + tarPath := filepath.Base(extractedDir) + "-REMAPPED.tar.gz" + repackSpinner, _ := pterm.DefaultSpinner.Start("Creating new archive...") + if err := archive.ReTarDir(extractedDir, tarPath); err != nil { + repackSpinner.Fail("Repack failed") + return fmt.Errorf("creating new archive: %w", err) + } + repackSpinner.Success(fmt.Sprintf("Created %s", tarPath)) + pterm.Success.Printfln("New archive created: %s", tarPath) + + return nil }, } // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. func Execute() { - err := rootCmd.Execute() - if err != nil { + if err := rootCmd.Execute(); err != nil { + pterm.Error.Println(err) os.Exit(1) } } diff --git a/go.mod b/go.mod index 669708f..1526d78 100644 --- a/go.mod +++ b/go.mod @@ -2,9 +2,24 @@ module github.com/mona-actions/gh-commit-remap go 1.25 -require github.com/spf13/cobra v1.8.1 +require ( + github.com/pterm/pterm v0.12.83 + github.com/spf13/cobra v1.8.1 +) require ( + atomicgo.dev/cursor v0.2.0 // indirect + atomicgo.dev/keyboard v0.2.9 // indirect + atomicgo.dev/schedule v0.1.0 // indirect + github.com/clipperhouse/uax29/v2 v2.7.0 // indirect + github.com/containerd/console v1.0.5 // indirect + github.com/gookit/color v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/lithammer/fuzzysearch v1.1.8 // indirect + github.com/mattn/go-runewidth v0.0.20 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect + golang.org/x/sys v0.41.0 // indirect + golang.org/x/term v0.40.0 // indirect + golang.org/x/text v0.34.0 // indirect ) diff --git a/go.sum b/go.sum index 912390a..1bdec4c 100644 --- a/go.sum +++ b/go.sum @@ -1,10 +1,126 @@ +atomicgo.dev/assert v0.0.2 h1:FiKeMiZSgRrZsPo9qn/7vmr7mCsh5SZyXY4YGYiYwrg= +atomicgo.dev/assert v0.0.2/go.mod h1:ut4NcI3QDdJtlmAxQULOmA13Gz6e2DWbSAS8RUOmNYQ= +atomicgo.dev/cursor v0.2.0 h1:H6XN5alUJ52FZZUkI7AlJbUc1aW38GWZalpYRPpoPOw= +atomicgo.dev/cursor v0.2.0/go.mod h1:Lr4ZJB3U7DfPPOkbH7/6TOtJ4vFGHlgj1nc+n900IpU= +atomicgo.dev/keyboard v0.2.9 h1:tOsIid3nlPLZ3lwgG8KZMp/SFmr7P0ssEN5JUsm78K8= +atomicgo.dev/keyboard v0.2.9/go.mod h1:BC4w9g00XkxH/f1HXhW2sXmJFOCWbKn9xrOunSFtExQ= +atomicgo.dev/schedule v0.1.0 h1:nTthAbhZS5YZmgYbb2+DH8uQIZcTlIrd4eYr3UQxEjs= +atomicgo.dev/schedule v0.1.0/go.mod h1:xeUa3oAkiuHYh8bKiQBRojqAMq3PXXbJujjb0hw8pEU= +github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs= +github.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8= +github.com/MarvinJWendt/testza v0.2.8/go.mod h1:nwIcjmr0Zz+Rcwfh3/4UhBp7ePKVhuBExvZqnKYWlII= +github.com/MarvinJWendt/testza v0.2.10/go.mod h1:pd+VWsoGUiFtq+hRKSU1Bktnn+DMCSrDrXDpX2bG66k= +github.com/MarvinJWendt/testza v0.2.12/go.mod h1:JOIegYyV7rX+7VZ9r77L/eH6CfJHHzXjB69adAhzZkI= +github.com/MarvinJWendt/testza v0.3.0/go.mod h1:eFcL4I0idjtIx8P9C6KkAuLgATNKpX4/2oUqKc6bF2c= +github.com/MarvinJWendt/testza v0.4.2/go.mod h1:mSdhXiKH8sg/gQehJ63bINcCKp7RtYewEjXsvsVUPbE= +github.com/MarvinJWendt/testza v0.5.2 h1:53KDo64C1z/h/d/stCYCPY69bt/OSwjq5KpFNwi+zB4= +github.com/MarvinJWendt/testza v0.5.2/go.mod h1:xu53QFE5sCdjtMCKk8YMQ2MnymimEctc4n3EjyIYvEY= +github.com/atomicgo/cursor v0.0.1/go.mod h1:cBON2QmmrysudxNBFthvMtN32r3jxVRIvzkUiF/RuIk= +github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk= +github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= +github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= +github.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc= +github.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gookit/assert v0.1.1 h1:lh3GcawXe/p+cU7ESTZ5Ui3Sm/x8JWpIis4/1aF0mY0= +github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj0VX2E= +github.com/gookit/color v1.4.2/go.mod h1:fqRyamkC1W8uxl+lxCQxOT09l/vYfZ+QeiX3rKQHCoQ= +github.com/gookit/color v1.5.0/go.mod h1:43aQb+Zerm/BWh2GnrgOQm7ffz7tvQXEKV6BFMl7wAo= +github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA= +github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.0.10/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= +github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= +github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= +github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4= +github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4= +github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.20 h1:WcT52H91ZUAwy8+HUkdM3THM6gXqXuLJi9O3rjcQQaQ= +github.com/mattn/go-runewidth v0.0.20/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI= +github.com/pterm/pterm v0.12.29/go.mod h1:WI3qxgvoQFFGKGjGnJR849gU0TsEOvKn5Q8LlY1U7lg= +github.com/pterm/pterm v0.12.30/go.mod h1:MOqLIyMOgmTDz9yorcYbcw+HsgoZo3BQfg2wtl3HEFE= +github.com/pterm/pterm v0.12.31/go.mod h1:32ZAWZVXD7ZfG0s8qqHXePte42kdz8ECtRyEejaWgXU= +github.com/pterm/pterm v0.12.33/go.mod h1:x+h2uL+n7CP/rel9+bImHD5lF3nM9vJj80k9ybiiTTE= +github.com/pterm/pterm v0.12.36/go.mod h1:NjiL09hFhT/vWjQHSj1athJpx6H8cjpHXNAK5bUw8T8= +github.com/pterm/pterm v0.12.40/go.mod h1:ffwPLwlbXxP+rxT0GsgDTzS3y3rmpAO1NMjUkGTYf8s= +github.com/pterm/pterm v0.12.83 h1:ie+YmGmA727VuhxBlyGr74Ks+7McV6kT99IB8EU80aA= +github.com/pterm/pterm v0.12.83/go.mod h1:xlgc6bFWyJIMtmLJvGim+L7jhSReilOlOnodeIYe4Tk= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= +github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E= +golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.40.0 h1:36e4zGLqU4yhjlmxEaagx2KuYbJq3EwY8K943ZsHcvg= +golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= +golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/commitremap/commitremap.go b/pkg/commitremap/commitremap.go index 88826af..dd5eb01 100644 --- a/pkg/commitremap/commitremap.go +++ b/pkg/commitremap/commitremap.go @@ -1,3 +1,4 @@ +// Package commitremap rewrites SHAs inside GitHub migration archive package commitremap import ( @@ -19,16 +20,17 @@ type invalidCommitMapLineError struct { line string fields int } +type Stats struct { + FilesScanned int + PerFile map[string]int +} func (e invalidCommitMapLineError) Error() string { return fmt.Sprintf("line %q has %d fields", e.line, e.fields) } // ParseCommitMap parses a commit-map file into a map of old to new SHAs. -// -// The file must contain one "old new" pair per line, matching the format git -// filter-repo emits. Whitespace-only lines are skipped, CRLF line endings are -// tolerated, and duplicate old SHAs are resolved with the last entry winning. +// The file must contain one "old new" pair per line, matching git filter-repo format. func ParseCommitMap(filePath string) (map[string]string, error) { commitMap := make(map[string]string) @@ -59,75 +61,106 @@ func ParseCommitMap(filePath string) (map[string]string, error) { // Each file is walked once, replacing string values that exactly match a key in // commitMap. Only whole-string SHA values are replaced. SHAs embedded in URLs, // markdown, or composite strings are not rewritten. -func ProcessFiles(archiveDir string, prefixes []string, commitMap map[string]string) error { +func ProcessFiles(archiveDir string, prefixes []string, commitMap map[string]string) (Stats, error) { + stats := Stats{PerFile: make(map[string]int)} + for _, prefix := range prefixes { pattern := filepath.Join(archiveDir, prefix+"_*.json") files, err := filepath.Glob(pattern) if err != nil { - return fmt.Errorf("globbing %s: %w", pattern, err) + return stats, fmt.Errorf("globbing %s: %w", pattern, err) } for _, file := range files { - err := updateMetadataFile(file, commitMap) + stats.FilesScanned++ + n, err := updateMetadataFile(file, commitMap) if err != nil { - return fmt.Errorf("updating metadata file %s: %w", file, err) + return stats, fmt.Errorf("updating metadata file %s: %w", file, err) + } + if n > 0 { + stats.PerFile[file] = n } } } - return nil + return stats, nil } -func updateMetadataFile(filePath string, commitMap map[string]string) error { +func updateMetadataFile(filePath string, commitMap map[string]string) (int, error) { data, err := os.ReadFile(filePath) if err != nil { - return fmt.Errorf("reading data: %w", err) + return 0, fmt.Errorf("reading data: %w", err) } var dataMap interface{} err = json.Unmarshal(data, &dataMap) if err != nil { - return fmt.Errorf("unmarshaling data: %w", err) + return 0, fmt.Errorf("unmarshaling data: %w", err) } - replaceSHA(dataMap, commitMap) + count := replaceSHA(dataMap, commitMap) + if count == 0 { + return 0, nil + } updatedData, err := json.MarshalIndent(dataMap, "", " ") if err != nil { - return fmt.Errorf("marshaling updated data: %w", err) + return count, fmt.Errorf("marshaling updated data: %w", err) } err = os.WriteFile(filePath, updatedData, 0644) if err != nil { - return fmt.Errorf("writing updated data: %w", err) + return count, fmt.Errorf("writing updated data: %w", err) } - return nil + return count, nil } -func replaceSHA(data interface{}, commitMap map[string]string) { +// replaceSHA walks data in place, rewriting whole-string values that match a +// key in commitMap. It returns the number of replacements performed. +func replaceSHA(data interface{}, commitMap map[string]string) int { + count := 0 switch v := data.(type) { case map[string]interface{}: for key, value := range v { if str, ok := value.(string); ok { if newSHA, hit := commitMap[str]; hit { v[key] = newSHA + count++ } continue } - replaceSHA(value, commitMap) + count += replaceSHA(value, commitMap) } case []interface{}: for i, value := range v { if str, ok := value.(string); ok { if newSHA, hit := commitMap[str]; hit { v[i] = newSHA + count++ } continue } - replaceSHA(value, commitMap) + count += replaceSHA(value, commitMap) } } + return count +} + +// summarize the work performed by a ProcessFiles call. +// +// FilesScanned counts every metadata file inspected + +// FilesChanged returns the number of files in which at least one SHA was rewritten. +func (s Stats) FilesChanged() int { return len(s.PerFile) } + +// TotalReplacements returns the total number of SHA replacements across all files. +func (s Stats) TotalReplacements() int { + total := 0 + for _, n := range s.PerFile { + total += n + } + return total } diff --git a/pkg/commitremap/commitremap_test.go b/pkg/commitremap/commitremap_test.go index e318092..7e269a4 100644 --- a/pkg/commitremap/commitremap_test.go +++ b/pkg/commitremap/commitremap_test.go @@ -166,17 +166,39 @@ func TestProcessFiles(t *testing.T) { }, } + paths := make(map[string]string, len(fixtures)) for name, fixture := range fixtures { - writeFile(t, filepath.Join(dir, name), fixture.input) + p := filepath.Join(dir, name) + paths[name] = p + writeFile(t, p, fixture.input) } - if err := ProcessFiles(dir, DefaultPrefixes(), commitMap); err != nil { + stats, err := ProcessFiles(dir, DefaultPrefixes(), commitMap) + if err != nil { t.Fatalf("ProcessFiles returned error: %v", err) } for name, fixture := range fixtures { assertJSONFileEqual(t, filepath.Join(dir, name), fixture.want) } + + if got, want := stats.FilesScanned, 3; got != want { + t.Fatalf("FilesScanned = %d, want %d", got, want) + } + if got, want := stats.FilesChanged(), 3; got != want { + t.Fatalf("FilesChanged() = %d, want %d", got, want) + } + // pull_requests: sha=oldSHA1, nested[0].head=oldSHA2 -> 2 + // issues: events[0].commit_id=oldSHA2 -> 1 + // issue_events: payload.before=oldSHA3, payload.after=oldSHA1 -> 2 + if got, want := stats.TotalReplacements(), 5; got != want { + t.Fatalf("TotalReplacements() = %d, want %d", got, want) + } + for name := range fixtures { + if n := stats.PerFile[paths[name]]; n <= 0 { + t.Fatalf("stats.PerFile[%s] = %d, want > 0", paths[name], n) + } + } }) t.Run("empty commit map", func(t *testing.T) { @@ -185,7 +207,7 @@ func TestProcessFiles(t *testing.T) { want := `{"sha":"oldSHA1","nested":[{"sha":"oldSHA2"}]}` writeFile(t, filePath, want) - if err := ProcessFiles(dir, []string{"pull_requests"}, map[string]string{}); err != nil { + if _, err := ProcessFiles(dir, []string{"pull_requests"}, map[string]string{}); err != nil { t.Fatalf("ProcessFiles returned error: %v", err) } @@ -193,7 +215,7 @@ func TestProcessFiles(t *testing.T) { }) t.Run("no matching files", func(t *testing.T) { - if err := ProcessFiles(t.TempDir(), DefaultPrefixes(), map[string]string{"oldSHA1": "newSHA1"}); err != nil { + if _, err := ProcessFiles(t.TempDir(), DefaultPrefixes(), map[string]string{"oldSHA1": "newSHA1"}); err != nil { t.Fatalf("ProcessFiles returned error: %v", err) } }) @@ -205,12 +227,23 @@ func TestProcessFiles(t *testing.T) { writeFile(t, fooPath, `{"sha":"oldSHA1"}`) writeFile(t, pullPath, `{"sha":"oldSHA1"}`) - if err := ProcessFiles(dir, []string{"foo"}, map[string]string{"oldSHA1": "newSHA1"}); err != nil { + stats, err := ProcessFiles(dir, []string{"foo"}, map[string]string{"oldSHA1": "newSHA1"}) + if err != nil { t.Fatalf("ProcessFiles returned error: %v", err) } assertJSONFileEqual(t, fooPath, `{"sha":"newSHA1"}`) assertJSONFileEqual(t, pullPath, `{"sha":"oldSHA1"}`) + + if got, want := len(stats.PerFile), 1; got != want { + t.Fatalf("len(stats.PerFile) = %d, want %d", got, want) + } + if n, ok := stats.PerFile[fooPath]; !ok || n <= 0 { + t.Fatalf("stats.PerFile[fooPath] = %d, ok=%v; want >0 entry", n, ok) + } + if _, ok := stats.PerFile[pullPath]; ok { + t.Fatalf("stats.PerFile must not contain pullPath") + } }) t.Run("single-pass behavior remaps all keys", func(t *testing.T) { @@ -224,7 +257,7 @@ func TestProcessFiles(t *testing.T) { "oldSHA3": "newSHA3", "oldSHA4": "newSHA4", } - if err := ProcessFiles(dir, []string{"pull_requests"}, commitMap); err != nil { + if _, err := ProcessFiles(dir, []string{"pull_requests"}, commitMap); err != nil { t.Fatalf("ProcessFiles returned error: %v", err) } @@ -237,7 +270,7 @@ func TestProcessFiles(t *testing.T) { original := `{"title":"no SHA here","body":"https://example.invalid/oldSHA1","labels":["bug","help wanted"]}` writeFile(t, filePath, original) - if err := ProcessFiles(dir, []string{"issues"}, map[string]string{"oldSHA1": "newSHA1"}); err != nil { + if _, err := ProcessFiles(dir, []string{"issues"}, map[string]string{"oldSHA1": "newSHA1"}); err != nil { t.Fatalf("ProcessFiles returned error: %v", err) } @@ -260,6 +293,91 @@ func TestDefaultPrefixes(t *testing.T) { } } +func TestProcessFiles_SkipsWriteWhenNoReplacements(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "pull_requests_000001.json") + original := []byte(`{"sha":"someSHA","nested":[{"sha":"otherSHA"}]}`) + if err := os.WriteFile(filePath, original, 0644); err != nil { + t.Fatalf("write fixture: %v", err) + } + infoBefore, err := os.Stat(filePath) + if err != nil { + t.Fatalf("stat before: %v", err) + } + + stats, err := ProcessFiles(dir, []string{"pull_requests"}, map[string]string{"unrelated": "x"}) + if err != nil { + t.Fatalf("ProcessFiles returned error: %v", err) + } + + got, err := os.ReadFile(filePath) + if err != nil { + t.Fatalf("read after: %v", err) + } + if !reflect.DeepEqual(got, original) { + t.Fatalf("file bytes changed; got %q, want %q", got, original) + } + + infoAfter, err := os.Stat(filePath) + if err != nil { + t.Fatalf("stat after: %v", err) + } + if !infoAfter.ModTime().Equal(infoBefore.ModTime()) { + t.Fatalf("mtime changed: before=%v after=%v", infoBefore.ModTime(), infoAfter.ModTime()) + } + + if stats.FilesScanned != 1 { + t.Fatalf("FilesScanned = %d, want 1", stats.FilesScanned) + } + if stats.FilesChanged() != 0 { + t.Fatalf("FilesChanged() = %d, want 0", stats.FilesChanged()) + } + if len(stats.PerFile) != 0 { + t.Fatalf("len(PerFile) = %d, want 0", len(stats.PerFile)) + } +} + +func TestProcessFiles_ReturnsPartialStatsOnError(t *testing.T) { + dir := t.TempDir() + // filepath.Glob returns sorted results, so pull_requests_000001.json is processed before pull_requests_000002.json. + validPath := filepath.Join(dir, "pull_requests_000001.json") + badPath := filepath.Join(dir, "pull_requests_000002.json") + writeFile(t, validPath, `{"sha":"oldSHA1"}`) + writeFile(t, badPath, `{not valid json`) + + stats, err := ProcessFiles(dir, []string{"pull_requests"}, map[string]string{"oldSHA1": "newSHA1"}) + if err == nil { + t.Fatal("expected error from malformed JSON file") + } + if stats.FilesScanned != 2 { + t.Fatalf("FilesScanned = %d, want 2", stats.FilesScanned) + } + if len(stats.PerFile) < 1 { + t.Fatalf("len(PerFile) = %d, want >= 1", len(stats.PerFile)) + } + if _, ok := stats.PerFile[validPath]; !ok { + t.Fatalf("PerFile must contain validPath %q; got %#v", validPath, stats.PerFile) + } +} + +func TestStats_HelperMethods(t *testing.T) { + s := Stats{FilesScanned: 5, PerFile: map[string]int{"a": 2, "b": 3}} + if got, want := s.FilesChanged(), 2; got != want { + t.Fatalf("FilesChanged() = %d, want %d", got, want) + } + if got, want := s.TotalReplacements(), 5; got != want { + t.Fatalf("TotalReplacements() = %d, want %d", got, want) + } + + var zero Stats + if got := zero.FilesChanged(); got != 0 { + t.Fatalf("zero.FilesChanged() = %d, want 0", got) + } + if got := zero.TotalReplacements(); got != 0 { + t.Fatalf("zero.TotalReplacements() = %d, want 0", got) + } +} + func writeFile(t *testing.T, path string, content string) { t.Helper()