diff --git a/.gitignore b/.gitignore index 224994c..3fed684 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ go.work go.work.sum gh-commit-remap + +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 0fcfa7d..cd36769 100644 --- a/README.md +++ b/README.md @@ -28,4 +28,6 @@ Flags: -h, --help help for gh-commit-remap -c, --mapping-file string Path to the commit map file Example: /path/to/commit-map -m, --migration-archive string Path to the migration archive Example: /path/to/migration-archive -``` + -t, --number-of-threads int [OPTIONAL] Number of threads(goroutines) to use for processing. + Defaults to 10, cannot exceed 50." +``` \ No newline at end of file diff --git a/cmd/root.go b/cmd/root.go index f94c337..3cb7e2a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -18,6 +18,9 @@ func init() { rootCmd.Flags().StringP("migration-archive", "m", "", "Path to the migration archive Example: /path/to/migration-archive.tar.gz") rootCmd.MarkFlagRequired("migration-archive") + + // Optional flag to specify the number of threads to use for processing + rootCmd.Flags().IntP("number-of-threads", "t", 10, "[OPTIONAL] Number of threads(goroutines) to use for processing. Defaults to 10") } // rootCmd represents the base command when called without any subcommands @@ -34,20 +37,25 @@ var rootCmd = &cobra.Command{ } // config to define the types of files to process - types := []string{"pull_requests", "issues", "issue_events"} + types := []string{"pull_requests", "pull_request_reviews", "pull_request_review_comments", "pull_request_review_threads", "commit_comments"} archivePath, _ := cmd.Flags().GetString("migration-archive") - - err = commitremap.ProcessFiles(archivePath, types, commitMap) + workers, _ := cmd.Flags().GetInt("number-of-threads") + if workers < 1 { + workers = 10 + } + if workers > 50 { + log.Fatalf("Number of threads cannot exceed 50") + } + err = commitremap.ProcessFiles(archivePath, types, commitMap, workers) if err != nil { log.Fatal(err) } - + log.Printf("Processed files successfully, re-taring archive") tarPath, err := archive.ReTar(archivePath) if err != nil { log.Fatal(err) } - log.Printf("New archive created: %s", tarPath) }, diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go index 27e6ef0..2f37af1 100644 --- a/internal/commitremap/commitremap.go +++ b/internal/commitremap/commitremap.go @@ -1,129 +1,180 @@ package commitremap import ( + "bufio" + "bytes" "encoding/json" "fmt" "log" "os" "path/filepath" "strings" + "sync" + "sync/atomic" ) -// Struct to represent a single entry in the commit map -type CommitMapEntry struct { - Old string - New string +const COMMIT_MAP_HEADER string = "old new" + +type File struct { + FilePath string + Prefix string } -// Parses the file and returns a map of old commit hashes to new commit hashes -func ParseCommitMap(filePath string) (*[]CommitMapEntry, error) { - commitMap := []CommitMapEntry{} +// Parses the commit-map file and returns a map of old commit hashes to +// new commit hashes using the old commit sha as the key - // Read the commit-map file +func ParseCommitMap(filePath string) (*map[string]string, error) { + commitMap := make(map[string]string) content, err := os.ReadFile(filePath) if err != nil { return nil, err } - // Split the file content into lines - lines := strings.Split(string(content), "\n") - - // Iterate over the lines and parse the old and new commit hashes - for _, line := range lines { - if strings.TrimSpace(line) == "" { + buf := bytes.NewBuffer(content) + if buf.Len() == 0 { + return &commitMap, nil + } + scanner := bufio.NewScanner(buf) + for scanner.Scan() { + line := scanner.Text() + // Skip adding the header to the map + if line == COMMIT_MAP_HEADER { continue } - - fields := strings.Fields(line) + fields := strings.Split(line, " ") if len(fields) != 2 { return nil, fmt.Errorf("invalid line: %s", line) } - - commitMap = append(commitMap, CommitMapEntry{ - Old: fields[0], - New: fields[1], - }) + oldSha, newSha := fields[0], fields[1] + commitMap[oldSha] = newSha + } + if err := scanner.Err(); err != nil { + return nil, err } return &commitMap, nil } -func ProcessFiles(archiveLocation string, prefixes []string, commitMap *[]CommitMapEntry) error { - - for _, prefix := range prefixes { - // Get a list of all files that match the pattern - files, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json")) +// Processes the files in the archive and updates the commit shas +func ProcessFiles(archiveLocation string, prefixes []string, + commitMap *map[string]string, workers int) error { + workerCount := workers + fileChannel := make(chan File, workerCount) + fileProcessWg := sync.WaitGroup{} + filesToProcess := getAllFilesToProcess(prefixes, archiveLocation) + totalFiles := len(filesToProcess) + processedFiles := make(chan File, totalFiles) + var processedFilesCount atomic.Int64 + + // go routine to print out the progress of the processed files. It also + // writes the processed files to a log file + fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles) + go func() { + f, err := os.OpenFile("processed_files.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - log.Fatalf("Error getting files: %v", err) + log.Fatalf("error opening processed files log: %v", err) } - - // Process each file - for _, file := range files { - log.Println("Processing file:", file) - - err := updateMetadataFile(file, commitMap) - if err != nil { - return fmt.Errorf("Error updating metadata file: %v; %v", file, err) + defer f.Close() + for file := range processedFiles { + // Clear the previous line + // \033 is the ASCII escape character + // [1A moves the cursor up one line + // [K erases the line + // https://en.wikipedia.org/wiki/ANSI_escape_code + fmt.Printf("\033[1A\033[K") + fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles) + if _, err := f.WriteString(fmt.Sprintf("%s\n", file.FilePath)); err != nil { + log.Fatalf("error writing to processed files log: %v", err) } } + }() + // Starts a pool of workers to process the files + for i := 0; i < workerCount; i++ { + fileProcessWg.Add(1) + go func() { + defer fileProcessWg.Done() + for file := range fileChannel { + err := updateMetadataFile(file, *commitMap) + if err != nil { + log.Fatalf("error updating metadata file: %v", err) + } + processedFiles <- file + processedFilesCount.Add(1) + } + }() + } + prefixWg := sync.WaitGroup{} + // Seperate go routines to add the files to the channel + for _, file := range filesToProcess { + prefixWg.Add(1) + go func(file File) { + defer prefixWg.Done() + fileChannel <- file + }(file) } + prefixWg.Wait() + close(fileChannel) + fileProcessWg.Wait() + close(processedFiles) return nil } -func updateMetadataFile(filePath string, commitMap *[]CommitMapEntry) error { - // Read the JSON file - data, err := os.ReadFile(filePath) +// Updates each metadata file with the new commit shas +func updateMetadataFile(file File, commitMap map[string]string) error { + var dataMap []interface{} + data, err := os.ReadFile(file.FilePath) if err != nil { - return fmt.Errorf("Error reading data: %v", err) + return err } - var dataMap interface{} err = json.Unmarshal(data, &dataMap) if err != nil { - return fmt.Errorf("Error unmarshaling data: %v", err) + return err } - - // Iterate over the commit map and replace the old commit hashes with the new ones - for _, commit := range *commitMap { - replaceSHA(dataMap, commit.Old, commit.New) + // Processes each of the different file types contained in the archive. + // The file types listed below are currently the only types that contain + // commit shas as a distinct field. + switch { + case file.Prefix == "pull_requests": + updatePullRequests(commitMap, &dataMap) + case file.Prefix == "pull_request_review_comments": + updatePullRequestReviewComments(commitMap, &dataMap) + case file.Prefix == "pull_request_reviews": + updatePullRequestReviews(commitMap, &dataMap) + case file.Prefix == "pull_request_review_threads": + updatePullRequestReviewThreads(commitMap, &dataMap) + case file.Prefix == "commit_comments": + updateCommitComments(commitMap, &dataMap) + default: + return fmt.Errorf("no supported rewrite found for file type: %s", file.Prefix) } - // Marshal the updated data to JSON and pretty print it + // Pretty print the data updatedData, err := json.MarshalIndent(dataMap, "", " ") if err != nil { - return fmt.Errorf("Error marshaling updated data: %v", err) + return fmt.Errorf("error marshaling updated data: %v", err) } - // Overwrite the original file with the updated data - err = os.WriteFile(filePath, updatedData, 0644) + err = os.WriteFile(file.FilePath, updatedData, 0644) if err != nil { - return fmt.Errorf("Error writing updated data: %v", err) + return fmt.Errorf("error writing updated data: %v", err) } return nil } -func replaceSHA(data interface{}, oldSHA string, newSHA string) { - if data == nil { - return - } - - switch v := data.(type) { - case map[string]interface{}: - for key, value := range v { - if str, ok := value.(string); ok && str == oldSHA { - v[key] = newSHA - } else { - replaceSHA(value, oldSHA, newSHA) - } +// Fetches all of the files to update based on the file prefixes +func getAllFilesToProcess(prefixes []string, archiveLocation string) []File { + var files []File + for _, prefix := range prefixes { + filePaths, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json")) + for _, filePath := range filePaths { + files = append(files, File{ + FilePath: filePath, + Prefix: prefix, + }) } - case []interface{}: - for i, value := range v { - if str, ok := value.(string); ok && str == oldSHA { - v[i] = newSHA - } else { - replaceSHA(value, oldSHA, newSHA) - } + if err != nil { + log.Fatalf("error getting files: %v", err) } - default: - // Unsupported type, do nothing } + return files } diff --git a/internal/commitremap/commitremap_test.go b/internal/commitremap/commitremap_test.go index 550dc13..386b8fe 100644 --- a/internal/commitremap/commitremap_test.go +++ b/internal/commitremap/commitremap_test.go @@ -2,6 +2,7 @@ package commitremap import ( "os" + "reflect" "testing" ) @@ -9,7 +10,7 @@ func TestParseCommitMap(t *testing.T) { tests := []struct { name string fileContent string - expected *[]CommitMapEntry + expected *map[string]string expectError bool }{ { @@ -17,17 +18,17 @@ func TestParseCommitMap(t *testing.T) { fileContent: `oldSHA1 newSHA1 oldSHA2 newSHA2 oldSHA3 newSHA3`, - expected: &[]CommitMapEntry{ - {Old: "oldSHA1", New: "newSHA1"}, - {Old: "oldSHA2", New: "newSHA2"}, - {Old: "oldSHA3", New: "newSHA3"}, + expected: &map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + "oldSHA3": "newSHA3", }, expectError: false, }, { name: "Empty file", fileContent: ``, - expected: &[]CommitMapEntry{}, + expected: &map[string]string{}, expectError: false, }, { @@ -38,6 +39,17 @@ oldSHA2 newSHA2`, expected: nil, expectError: true, }, + { + name: "Skips first line (old .... new) and reads the rest", + fileContent: `old new +oldSHA1 newSHA1 +oldSHA2 newSHA2`, + expected: &map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + }, + expectError: false, + }, } for _, tt := range tests { @@ -72,14 +84,9 @@ oldSHA2 newSHA2`, } } - // Check the result - if len(*result) != len(*tt.expected) { - t.Fatalf("Expected %d entries, got %d", len(*tt.expected), len(*result)) - } - for i, entry := range *result { - if entry.Old != (*tt.expected)[i].Old || entry.New != (*tt.expected)[i].New { - t.Errorf("Expected entry %d to be %+v, got %+v", i, (*tt.expected)[i], entry) - } + // Compare maps for equality and length + if !reflect.DeepEqual(*result, *tt.expected) { + t.Errorf("Expected %+v, got %+v", *tt.expected, *result) } }) } diff --git a/internal/commitremap/updatemodels.go b/internal/commitremap/updatemodels.go new file mode 100644 index 0000000..32053af --- /dev/null +++ b/internal/commitremap/updatemodels.go @@ -0,0 +1,95 @@ +package commitremap + +func updatePullRequests(commitMap map[string]string, pullRequests *[]interface{}) error { + for _, pr := range *pullRequests { + if prMap, ok := pr.(map[string]interface{}); ok { + // head_sha, base_sha, merge_commit_sha + if headMap, ok := prMap["head"].(map[string]interface{}); ok { + if headSha, ok := headMap["sha"].(string); ok { + if newSha, ok := commitMap[headSha]; ok { + headMap["sha"] = newSha + } + } + } + if baseMap, ok := prMap["base"].(map[string]interface{}); ok { + if baseSha, ok := baseMap["sha"].(string); ok { + if newSha, ok := commitMap[baseSha]; ok { + baseMap["sha"] = newSha + } + } + } + if mergeCommitSha, ok := prMap["merge_commit_sha"].(string); ok { + if newSha, ok := commitMap[mergeCommitSha]; ok { + prMap["merge_commit_sha"] = newSha + } + } + } + } + return nil +} + +func updatePullRequestReviews(commitMap map[string]string, pullRequestReview *[]interface{}) error { + for _, prr := range *pullRequestReview { + if prrMap, ok := prr.(map[string]interface{}); ok { + // head_sha + if headSha, ok := prrMap["head_sha"].(string); ok { + if newSha, ok := commitMap[headSha]; ok { + prrMap["head_sha"] = newSha + } + } + } + } + return nil +} + +func updatePullRequestReviewComments(commitMap map[string]string, pullRequestReviewComments *[]interface{}) error { + for _, prrc := range *pullRequestReviewComments { + if prrcMap, ok := prrc.(map[string]interface{}); ok { + // commit_id, original_commit_id + if commitId, ok := prrcMap["commit_id"].(string); ok { + if newSha, ok := commitMap[commitId]; ok { + prrcMap["commit_id"] = newSha + } + } + if originalCommitId, ok := prrcMap["original_commit_id"].(string); ok { + if newSha, ok := commitMap[originalCommitId]; ok { + prrcMap["original_commit_id"] = newSha + } + } + } + } + return nil +} + +func updatePullRequestReviewThreads(commitMap map[string]string, pullRequestReviewThreads *[]interface{}) error { + for _, prrt := range *pullRequestReviewThreads { + if prrtMap, ok := prrt.(map[string]interface{}); ok { + // commit_id, original_commit_id + if commitId, ok := prrtMap["commit_id"].(string); ok { + if newSha, ok := commitMap[commitId]; ok { + prrtMap["commit_id"] = newSha + } + } + if originalCommitId, ok := prrtMap["original_commit_id"].(string); ok { + if newSha, ok := commitMap[originalCommitId]; ok { + prrtMap["original_commit_id"] = newSha + } + } + } + } + return nil +} + +func updateCommitComments(commitMap map[string]string, commitComments *[]interface{}) error { + for _, cc := range *commitComments { + if ccMap, ok := cc.(map[string]interface{}); ok { + // commit_id + if commitId, ok := ccMap["commit_id"].(string); ok { + if newSha, ok := commitMap[commitId]; ok { + ccMap["commit_id"] = newSha + } + } + } + } + return nil +}