Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ go.work
go.work.sum

gh-commit-remap

.DS_Store
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,6 @@ Flags:
-h, --help help for gh-commit-remap
-c, --mapping-file string Path to the commit map file Example: /path/to/commit-map
-m, --migration-archive string Path to the migration archive Example: /path/to/migration-archive
```
-t, --number-of-threads int [OPTIONAL] Number of threads(goroutines) to use for processing.
Defaults to 10, cannot exceed 50."
```
18 changes: 13 additions & 5 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ func init() {

rootCmd.Flags().StringP("migration-archive", "m", "", "Path to the migration archive Example: /path/to/migration-archive.tar.gz")
rootCmd.MarkFlagRequired("migration-archive")

// Optional flag to specify the number of threads to use for processing
rootCmd.Flags().IntP("number-of-threads", "t", 10, "[OPTIONAL] Number of threads(goroutines) to use for processing. Defaults to 10")
}

// rootCmd represents the base command when called without any subcommands
Expand All @@ -34,20 +37,25 @@ var rootCmd = &cobra.Command{
}

// config to define the types of files to process
types := []string{"pull_requests", "issues", "issue_events"}
types := []string{"pull_requests", "pull_request_reviews", "pull_request_review_comments", "pull_request_review_threads", "commit_comments"}

archivePath, _ := cmd.Flags().GetString("migration-archive")

err = commitremap.ProcessFiles(archivePath, types, commitMap)
workers, _ := cmd.Flags().GetInt("number-of-threads")
Comment thread
pmartindev marked this conversation as resolved.
if workers < 1 {
workers = 10
}
if workers > 50 {
log.Fatalf("Number of threads cannot exceed 50")
}
err = commitremap.ProcessFiles(archivePath, types, commitMap, workers)
if err != nil {
log.Fatal(err)
}

log.Printf("Processed files successfully, re-taring archive")
tarPath, err := archive.ReTar(archivePath)
if err != nil {
log.Fatal(err)
}

log.Printf("New archive created: %s", tarPath)

},
Expand Down
195 changes: 123 additions & 72 deletions internal/commitremap/commitremap.go
Original file line number Diff line number Diff line change
@@ -1,129 +1,180 @@
package commitremap

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
)

// Struct to represent a single entry in the commit map
type CommitMapEntry struct {
Old string
New string
const COMMIT_MAP_HEADER string = "old new"

type File struct {
FilePath string
Prefix string
}

// Parses the file and returns a map of old commit hashes to new commit hashes
func ParseCommitMap(filePath string) (*[]CommitMapEntry, error) {
commitMap := []CommitMapEntry{}
// Parses the commit-map file and returns a map of old commit hashes to
// new commit hashes using the old commit sha as the key

// Read the commit-map file
func ParseCommitMap(filePath string) (*map[string]string, error) {
commitMap := make(map[string]string)
content, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
// Split the file content into lines
lines := strings.Split(string(content), "\n")

// Iterate over the lines and parse the old and new commit hashes
for _, line := range lines {
if strings.TrimSpace(line) == "" {
buf := bytes.NewBuffer(content)
if buf.Len() == 0 {
return &commitMap, nil
}
scanner := bufio.NewScanner(buf)
for scanner.Scan() {
Comment thread
pmartindev marked this conversation as resolved.
line := scanner.Text()
// Skip adding the header to the map
if line == COMMIT_MAP_HEADER {
continue
}

fields := strings.Fields(line)
fields := strings.Split(line, " ")
if len(fields) != 2 {
return nil, fmt.Errorf("invalid line: %s", line)
}

commitMap = append(commitMap, CommitMapEntry{
Old: fields[0],
New: fields[1],
})
oldSha, newSha := fields[0], fields[1]
commitMap[oldSha] = newSha
}
if err := scanner.Err(); err != nil {
return nil, err
}
return &commitMap, nil
}

func ProcessFiles(archiveLocation string, prefixes []string, commitMap *[]CommitMapEntry) error {

for _, prefix := range prefixes {
// Get a list of all files that match the pattern
files, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json"))
// Processes the files in the archive and updates the commit shas
func ProcessFiles(archiveLocation string, prefixes []string,
commitMap *map[string]string, workers int) error {
workerCount := workers
fileChannel := make(chan File, workerCount)
fileProcessWg := sync.WaitGroup{}
filesToProcess := getAllFilesToProcess(prefixes, archiveLocation)
totalFiles := len(filesToProcess)
processedFiles := make(chan File, totalFiles)
var processedFilesCount atomic.Int64

// go routine to print out the progress of the processed files. It also
// writes the processed files to a log file
fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles)
go func() {
f, err := os.OpenFile("processed_files.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatalf("Error getting files: %v", err)
log.Fatalf("error opening processed files log: %v", err)
}

// Process each file
for _, file := range files {
log.Println("Processing file:", file)

err := updateMetadataFile(file, commitMap)
if err != nil {
return fmt.Errorf("Error updating metadata file: %v; %v", file, err)
defer f.Close()
for file := range processedFiles {
// Clear the previous line
// \033 is the ASCII escape character
// [1A moves the cursor up one line
// [K erases the line
// https://en.wikipedia.org/wiki/ANSI_escape_code
fmt.Printf("\033[1A\033[K")
Comment thread
pmartindev marked this conversation as resolved.
fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles)
if _, err := f.WriteString(fmt.Sprintf("%s\n", file.FilePath)); err != nil {
log.Fatalf("error writing to processed files log: %v", err)
}
}
}()
// Starts a pool of workers to process the files
for i := 0; i < workerCount; i++ {
fileProcessWg.Add(1)
go func() {
defer fileProcessWg.Done()
for file := range fileChannel {
err := updateMetadataFile(file, *commitMap)
if err != nil {
log.Fatalf("error updating metadata file: %v", err)
}
processedFiles <- file
processedFilesCount.Add(1)
}
}()
}
prefixWg := sync.WaitGroup{}
// Seperate go routines to add the files to the channel
for _, file := range filesToProcess {
prefixWg.Add(1)
go func(file File) {
defer prefixWg.Done()
fileChannel <- file
}(file)
}
prefixWg.Wait()
close(fileChannel)
fileProcessWg.Wait()
close(processedFiles)
return nil
}

func updateMetadataFile(filePath string, commitMap *[]CommitMapEntry) error {
// Read the JSON file
data, err := os.ReadFile(filePath)
// Updates each metadata file with the new commit shas
func updateMetadataFile(file File, commitMap map[string]string) error {
var dataMap []interface{}
data, err := os.ReadFile(file.FilePath)
if err != nil {
return fmt.Errorf("Error reading data: %v", err)
return err
}

var dataMap interface{}
err = json.Unmarshal(data, &dataMap)
if err != nil {
return fmt.Errorf("Error unmarshaling data: %v", err)
return err
}

// Iterate over the commit map and replace the old commit hashes with the new ones
for _, commit := range *commitMap {
replaceSHA(dataMap, commit.Old, commit.New)
// Processes each of the different file types contained in the archive.
// The file types listed below are currently the only types that contain
// commit shas as a distinct field.
switch {
case file.Prefix == "pull_requests":
updatePullRequests(commitMap, &dataMap)
case file.Prefix == "pull_request_review_comments":
updatePullRequestReviewComments(commitMap, &dataMap)
case file.Prefix == "pull_request_reviews":
updatePullRequestReviews(commitMap, &dataMap)
case file.Prefix == "pull_request_review_threads":
updatePullRequestReviewThreads(commitMap, &dataMap)
case file.Prefix == "commit_comments":
updateCommitComments(commitMap, &dataMap)
default:
return fmt.Errorf("no supported rewrite found for file type: %s", file.Prefix)
}

// Marshal the updated data to JSON and pretty print it
// Pretty print the data
updatedData, err := json.MarshalIndent(dataMap, "", " ")
if err != nil {
return fmt.Errorf("Error marshaling updated data: %v", err)
return fmt.Errorf("error marshaling updated data: %v", err)
}

// Overwrite the original file with the updated data
err = os.WriteFile(filePath, updatedData, 0644)
err = os.WriteFile(file.FilePath, updatedData, 0644)
if err != nil {
return fmt.Errorf("Error writing updated data: %v", err)
return fmt.Errorf("error writing updated data: %v", err)
}

return nil
}

func replaceSHA(data interface{}, oldSHA string, newSHA string) {
if data == nil {
return
}

switch v := data.(type) {
case map[string]interface{}:
for key, value := range v {
if str, ok := value.(string); ok && str == oldSHA {
v[key] = newSHA
} else {
replaceSHA(value, oldSHA, newSHA)
}
// Fetches all of the files to update based on the file prefixes
func getAllFilesToProcess(prefixes []string, archiveLocation string) []File {
var files []File
for _, prefix := range prefixes {
filePaths, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json"))
for _, filePath := range filePaths {
files = append(files, File{
FilePath: filePath,
Prefix: prefix,
})
}
case []interface{}:
for i, value := range v {
if str, ok := value.(string); ok && str == oldSHA {
v[i] = newSHA
} else {
replaceSHA(value, oldSHA, newSHA)
}
if err != nil {
log.Fatalf("error getting files: %v", err)
}
default:
// Unsupported type, do nothing
}
return files
}
35 changes: 21 additions & 14 deletions internal/commitremap/commitremap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,33 @@ package commitremap

import (
"os"
"reflect"
"testing"
)

func TestParseCommitMap(t *testing.T) {
tests := []struct {
name string
fileContent string
expected *[]CommitMapEntry
expected *map[string]string
expectError bool
}{
{
name: "Valid commit map",
fileContent: `oldSHA1 newSHA1
oldSHA2 newSHA2
oldSHA3 newSHA3`,
expected: &[]CommitMapEntry{
{Old: "oldSHA1", New: "newSHA1"},
{Old: "oldSHA2", New: "newSHA2"},
{Old: "oldSHA3", New: "newSHA3"},
expected: &map[string]string{
"oldSHA1": "newSHA1",
"oldSHA2": "newSHA2",
"oldSHA3": "newSHA3",
},
expectError: false,
},
{
name: "Empty file",
fileContent: ``,
expected: &[]CommitMapEntry{},
expected: &map[string]string{},
expectError: false,
},
{
Expand All @@ -38,6 +39,17 @@ oldSHA2 newSHA2`,
expected: nil,
expectError: true,
},
{
name: "Skips first line (old .... new) and reads the rest",
fileContent: `old new
oldSHA1 newSHA1
oldSHA2 newSHA2`,
expected: &map[string]string{
"oldSHA1": "newSHA1",
"oldSHA2": "newSHA2",
},
expectError: false,
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -72,14 +84,9 @@ oldSHA2 newSHA2`,
}
}

// Check the result
if len(*result) != len(*tt.expected) {
t.Fatalf("Expected %d entries, got %d", len(*tt.expected), len(*result))
}
for i, entry := range *result {
if entry.Old != (*tt.expected)[i].Old || entry.New != (*tt.expected)[i].New {
t.Errorf("Expected entry %d to be %+v, got %+v", i, (*tt.expected)[i], entry)
}
// Compare maps for equality and length
if !reflect.DeepEqual(*result, *tt.expected) {
t.Errorf("Expected %+v, got %+v", *tt.expected, *result)
}
})
}
Expand Down
Loading