From 7b72e8e7f3d567555dadd21e496a4f3e6c2e96af Mon Sep 17 00:00:00 2001 From: Preston Martin Date: Fri, 6 Sep 2024 16:04:40 -0500 Subject: [PATCH 1/6] Performance improvements using maps and goroutines --- README.md | 3 +- cmd/root.go | 12 +- internal/commitremap/commitremap.go | 167 +++++++++++++---------- internal/commitremap/commitremap_test.go | 35 +++-- internal/commitremap/updatemodels.go | 93 +++++++++++++ 5 files changed, 221 insertions(+), 89 deletions(-) create mode 100644 internal/commitremap/updatemodels.go diff --git a/README.md b/README.md index 0fcfa7d..a2120b8 100644 --- a/README.md +++ b/README.md @@ -28,4 +28,5 @@ Flags: -h, --help help for gh-commit-remap -c, --mapping-file string Path to the commit map file Example: /path/to/commit-map -m, --migration-archive string Path to the migration archive Example: /path/to/migration-archive -``` + -t, --number-of-threads int [OPTIONAL] Number of threads(goroutines) to use for processing. Defaults to 10" +``` \ No newline at end of file diff --git a/cmd/root.go b/cmd/root.go index f94c337..066f0a9 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -18,6 +18,9 @@ func init() { rootCmd.Flags().StringP("migration-archive", "m", "", "Path to the migration archive Example: /path/to/migration-archive.tar.gz") rootCmd.MarkFlagRequired("migration-archive") + + // Optional flag to specify the number of threads to use for processing + rootCmd.Flags().StringP("number-of-threads", "t", "10", "[OPTIONAL] Number of threads(goroutines) to use for processing. Defaults to 10") } // rootCmd represents the base command when called without any subcommands @@ -34,20 +37,19 @@ var rootCmd = &cobra.Command{ } // config to define the types of files to process - types := []string{"pull_requests", "issues", "issue_events"} + types := []string{"pull_requests", "pull_request_reviews", "pull_request_review_comments", "pull_request_review_threads", "commit_comments"} archivePath, _ := cmd.Flags().GetString("migration-archive") - - err = commitremap.ProcessFiles(archivePath, types, commitMap) + workers, _ := cmd.Flags().GetInt("number-of-threads") + err = commitremap.ProcessFiles(archivePath, types, commitMap, workers) if err != nil { log.Fatal(err) } - + log.Printf("Processed files successfully, re-taring archive") tarPath, err := archive.ReTar(archivePath) if err != nil { log.Fatal(err) } - log.Printf("New archive created: %s", tarPath) }, diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go index 27e6ef0..d1fe584 100644 --- a/internal/commitremap/commitremap.go +++ b/internal/commitremap/commitremap.go @@ -1,99 +1,137 @@ package commitremap import ( + "bufio" + "bytes" "encoding/json" "fmt" "log" "os" "path/filepath" "strings" + "sync" ) -// Struct to represent a single entry in the commit map -type CommitMapEntry struct { - Old string - New string +type File struct { + FilePath string + Prefix string } // Parses the file and returns a map of old commit hashes to new commit hashes -func ParseCommitMap(filePath string) (*[]CommitMapEntry, error) { - commitMap := []CommitMapEntry{} - +func ParseCommitMap(filePath string) (*map[string]string, error) { + commitMap := make(map[string]string) // Read the commit-map file content, err := os.ReadFile(filePath) if err != nil { return nil, err } - // Split the file content into lines - lines := strings.Split(string(content), "\n") - - // Iterate over the lines and parse the old and new commit hashes - for _, line := range lines { - if strings.TrimSpace(line) == "" { + buf := bytes.NewBuffer(content) + scanner := bufio.NewScanner(buf) + for scanner.Scan() { + line := scanner.Text() + // Skip adding the header to the map + if line == "old new" { continue } - - fields := strings.Fields(line) + fields := strings.Split(line, " ") if len(fields) != 2 { return nil, fmt.Errorf("invalid line: %s", line) } - - commitMap = append(commitMap, CommitMapEntry{ - Old: fields[0], - New: fields[1], - }) + commitMap[fields[0]] = fields[1] } return &commitMap, nil } -func ProcessFiles(archiveLocation string, prefixes []string, commitMap *[]CommitMapEntry) error { - - for _, prefix := range prefixes { - // Get a list of all files that match the pattern - files, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json")) +func ProcessFiles(archiveLocation string, prefixes []string, + commitMap *map[string]string, workers int) error { + workerCount := 10 + fileChannel := make(chan File, workerCount) + fileProcessWg := sync.WaitGroup{} + filesToProcess := getAllFilesToProcess(prefixes, archiveLocation) + totalFiles := len(filesToProcess) + processedFiles := make(chan File, totalFiles) + processedFilesCount := 0 + // go routine to print out the progress of the processed files. It also + // writes the processed files to a log file + fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles) + go func() { + f, err := os.OpenFile("processed_files.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - log.Fatalf("Error getting files: %v", err) + log.Fatalf("Error opening processed files log: %v", err) } - - // Process each file - for _, file := range files { - log.Println("Processing file:", file) - - err := updateMetadataFile(file, commitMap) - if err != nil { - return fmt.Errorf("Error updating metadata file: %v; %v", file, err) + defer f.Close() + for file := range processedFiles { + fmt.Printf("\033[1A\033[K") + fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles) + if _, err := f.WriteString(fmt.Sprintf("%s\n", file.FilePath)); err != nil { + log.Fatalf("Error writing to processed files log: %v", err) } } + }() + + for i := 0; i < workerCount; i++ { + fileProcessWg.Add(1) + go func() { + defer fileProcessWg.Done() + for file := range fileChannel { + err := updateMetadataFile(file, *commitMap) + if err != nil { + log.Fatalf("Error updating metadata file: %v", err) + } + processedFiles <- file + processedFilesCount++ + } + }() + } + prefixWg := sync.WaitGroup{} + // Add the files to the channel + for _, file := range filesToProcess { + prefixWg.Add(1) + go func(file File) { + defer prefixWg.Done() + fileChannel <- file + }(file) } + prefixWg.Wait() + close(fileChannel) + fileProcessWg.Wait() + close(processedFiles) return nil } -func updateMetadataFile(filePath string, commitMap *[]CommitMapEntry) error { - // Read the JSON file - data, err := os.ReadFile(filePath) +func updateMetadataFile(file File, commitMap map[string]string) error { + var dataMap []interface{} + data, err := os.ReadFile(file.FilePath) if err != nil { - return fmt.Errorf("Error reading data: %v", err) + return err } - var dataMap interface{} err = json.Unmarshal(data, &dataMap) if err != nil { - return fmt.Errorf("Error unmarshaling data: %v", err) + return err } - - // Iterate over the commit map and replace the old commit hashes with the new ones - for _, commit := range *commitMap { - replaceSHA(dataMap, commit.Old, commit.New) + switch { + case file.Prefix == "pull_requests": + updatePullRequests(commitMap, &dataMap) + case file.Prefix == "pull_request_review_comments": + updatePullRequestReviewComments(commitMap, &dataMap) + case file.Prefix == "pull_request_reviews": + updatePullRequestReviews(commitMap, &dataMap) + case file.Prefix == "pull_request_review_threads": + updatePullRequestReviewThreads(commitMap, &dataMap) + case file.Prefix == "commit_comments": + updateCommitComments(commitMap, &dataMap) + default: + return fmt.Errorf("No supported rewrite found for file type: %s", file.Prefix) } - // Marshal the updated data to JSON and pretty print it + // Pretty print the data updatedData, err := json.MarshalIndent(dataMap, "", " ") if err != nil { return fmt.Errorf("Error marshaling updated data: %v", err) } - // Overwrite the original file with the updated data - err = os.WriteFile(filePath, updatedData, 0644) + err = os.WriteFile(file.FilePath, updatedData, 0644) if err != nil { return fmt.Errorf("Error writing updated data: %v", err) } @@ -101,29 +139,20 @@ func updateMetadataFile(filePath string, commitMap *[]CommitMapEntry) error { return nil } -func replaceSHA(data interface{}, oldSHA string, newSHA string) { - if data == nil { - return - } - - switch v := data.(type) { - case map[string]interface{}: - for key, value := range v { - if str, ok := value.(string); ok && str == oldSHA { - v[key] = newSHA - } else { - replaceSHA(value, oldSHA, newSHA) - } +func getAllFilesToProcess(prefixes []string, archiveLocation string) []File { + var files []File + for _, prefix := range prefixes { + // Get a list of all filePaths that match the pattern + filePaths, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json")) + for _, filePath := range filePaths { + files = append(files, File{ + FilePath: filePath, + Prefix: prefix, + }) } - case []interface{}: - for i, value := range v { - if str, ok := value.(string); ok && str == oldSHA { - v[i] = newSHA - } else { - replaceSHA(value, oldSHA, newSHA) - } + if err != nil { + log.Fatalf("Error getting files: %v", err) } - default: - // Unsupported type, do nothing } + return files } diff --git a/internal/commitremap/commitremap_test.go b/internal/commitremap/commitremap_test.go index 550dc13..386b8fe 100644 --- a/internal/commitremap/commitremap_test.go +++ b/internal/commitremap/commitremap_test.go @@ -2,6 +2,7 @@ package commitremap import ( "os" + "reflect" "testing" ) @@ -9,7 +10,7 @@ func TestParseCommitMap(t *testing.T) { tests := []struct { name string fileContent string - expected *[]CommitMapEntry + expected *map[string]string expectError bool }{ { @@ -17,17 +18,17 @@ func TestParseCommitMap(t *testing.T) { fileContent: `oldSHA1 newSHA1 oldSHA2 newSHA2 oldSHA3 newSHA3`, - expected: &[]CommitMapEntry{ - {Old: "oldSHA1", New: "newSHA1"}, - {Old: "oldSHA2", New: "newSHA2"}, - {Old: "oldSHA3", New: "newSHA3"}, + expected: &map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + "oldSHA3": "newSHA3", }, expectError: false, }, { name: "Empty file", fileContent: ``, - expected: &[]CommitMapEntry{}, + expected: &map[string]string{}, expectError: false, }, { @@ -38,6 +39,17 @@ oldSHA2 newSHA2`, expected: nil, expectError: true, }, + { + name: "Skips first line (old .... new) and reads the rest", + fileContent: `old new +oldSHA1 newSHA1 +oldSHA2 newSHA2`, + expected: &map[string]string{ + "oldSHA1": "newSHA1", + "oldSHA2": "newSHA2", + }, + expectError: false, + }, } for _, tt := range tests { @@ -72,14 +84,9 @@ oldSHA2 newSHA2`, } } - // Check the result - if len(*result) != len(*tt.expected) { - t.Fatalf("Expected %d entries, got %d", len(*tt.expected), len(*result)) - } - for i, entry := range *result { - if entry.Old != (*tt.expected)[i].Old || entry.New != (*tt.expected)[i].New { - t.Errorf("Expected entry %d to be %+v, got %+v", i, (*tt.expected)[i], entry) - } + // Compare maps for equality and length + if !reflect.DeepEqual(*result, *tt.expected) { + t.Errorf("Expected %+v, got %+v", *tt.expected, *result) } }) } diff --git a/internal/commitremap/updatemodels.go b/internal/commitremap/updatemodels.go new file mode 100644 index 0000000..42195ea --- /dev/null +++ b/internal/commitremap/updatemodels.go @@ -0,0 +1,93 @@ +package commitremap + +func updatePullRequests(commitMap map[string]string, pullRequests *[]interface{}) error { + for _, pr := range *pullRequests { + if prMap, ok := pr.(map[string]interface{}); ok { + // head_sha, base_sha, merge_commit_sha + if headMap, ok := prMap["head"].(map[string]interface{}); ok { + if headSha, ok := headMap["sha"].(string); ok { + if newSha, ok := commitMap[headSha]; ok { + headMap["sha"] = newSha + } + } + } + if baseMap, ok := prMap["base"].(map[string]interface{}); ok { + if baseSha, ok := baseMap["sha"].(string); ok { + if newSha, ok := commitMap[baseSha]; ok { + baseMap["sha"] = newSha + } + } + } + if mergeCommitSha, ok := prMap["merge_commit_sha"].(string); ok { + if newSha, ok := commitMap[mergeCommitSha]; ok { + prMap["merge_commit_sha"] = newSha + } + } + } + } + return nil +} + +func updatePullRequestReviews(commitMap map[string]string, pullRequestReview *[]interface{}) error { + for _, prr := range *pullRequestReview { + if prrMap, ok := prr.(map[string]interface{}); ok { + if headSha, ok := prrMap["head_sha"].(string); ok { + if newSha, ok := commitMap[headSha]; ok { + prrMap["head_sha"] = newSha + } + } + } + } + return nil +} + +func updatePullRequestReviewComments(commitMap map[string]string, pullRequestReviewComments *[]interface{}) error { + for _, prrc := range *pullRequestReviewComments { + if prrcMap, ok := prrc.(map[string]interface{}); ok { + // commit_id, original_commit_id + if commitId, ok := prrcMap["commit_id"].(string); ok { + if newSha, ok := commitMap[commitId]; ok { + prrcMap["commit_id"] = newSha + } + } + if originalCommitId, ok := prrcMap["original_commit_id"].(string); ok { + if newSha, ok := commitMap[originalCommitId]; ok { + prrcMap["original_commit_id"] = newSha + } + } + } + } + return nil +} + +func updatePullRequestReviewThreads(commitMap map[string]string, pullRequestReviewThreads *[]interface{}) error { + for _, prrt := range *pullRequestReviewThreads { + if prrtMap, ok := prrt.(map[string]interface{}); ok { + if commitId, ok := prrtMap["commit_id"].(string); ok { + if newSha, ok := commitMap[commitId]; ok { + prrtMap["commit_id"] = newSha + } + } + if originalCommitId, ok := prrtMap["original_commit_id"].(string); ok { + if newSha, ok := commitMap[originalCommitId]; ok { + prrtMap["original_commit_id"] = newSha + } + } + } + } + return nil +} + +func updateCommitComments(commitMap map[string]string, commitComments *[]interface{}) error { + for _, cc := range *commitComments { + if ccMap, ok := cc.(map[string]interface{}); ok { + // commit_id + if commitId, ok := ccMap["commit_id"].(string); ok { + if newSha, ok := commitMap[commitId]; ok { + ccMap["commit_id"] = newSha + } + } + } + } + return nil +} From 66be41390d10ada06a437057950999971c53647c Mon Sep 17 00:00:00 2001 From: Preston Martin Date: Fri, 6 Sep 2024 17:05:35 -0500 Subject: [PATCH 2/6] Update internal/commitremap/commitremap.go Co-authored-by: Alejandro Menocal --- internal/commitremap/commitremap.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go index d1fe584..354ce17 100644 --- a/internal/commitremap/commitremap.go +++ b/internal/commitremap/commitremap.go @@ -44,7 +44,7 @@ func ParseCommitMap(filePath string) (*map[string]string, error) { func ProcessFiles(archiveLocation string, prefixes []string, commitMap *map[string]string, workers int) error { - workerCount := 10 + workerCount := workers fileChannel := make(chan File, workerCount) fileProcessWg := sync.WaitGroup{} filesToProcess := getAllFilesToProcess(prefixes, archiveLocation) From 9e68bcb44248179d883711b10311613553131636 Mon Sep 17 00:00:00 2001 From: Preston Martin Date: Fri, 6 Sep 2024 17:30:55 -0500 Subject: [PATCH 3/6] Comments, --- README.md | 3 ++- cmd/root.go | 8 ++++++- internal/commitremap/commitremap.go | 35 ++++++++++++++++++---------- internal/commitremap/updatemodels.go | 2 ++ 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index a2120b8..cd36769 100644 --- a/README.md +++ b/README.md @@ -28,5 +28,6 @@ Flags: -h, --help help for gh-commit-remap -c, --mapping-file string Path to the commit map file Example: /path/to/commit-map -m, --migration-archive string Path to the migration archive Example: /path/to/migration-archive - -t, --number-of-threads int [OPTIONAL] Number of threads(goroutines) to use for processing. Defaults to 10" + -t, --number-of-threads int [OPTIONAL] Number of threads(goroutines) to use for processing. + Defaults to 10, cannot exceed 50." ``` \ No newline at end of file diff --git a/cmd/root.go b/cmd/root.go index 066f0a9..3cb7e2a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -20,7 +20,7 @@ func init() { rootCmd.MarkFlagRequired("migration-archive") // Optional flag to specify the number of threads to use for processing - rootCmd.Flags().StringP("number-of-threads", "t", "10", "[OPTIONAL] Number of threads(goroutines) to use for processing. Defaults to 10") + rootCmd.Flags().IntP("number-of-threads", "t", 10, "[OPTIONAL] Number of threads(goroutines) to use for processing. Defaults to 10") } // rootCmd represents the base command when called without any subcommands @@ -41,6 +41,12 @@ var rootCmd = &cobra.Command{ archivePath, _ := cmd.Flags().GetString("migration-archive") workers, _ := cmd.Flags().GetInt("number-of-threads") + if workers < 1 { + workers = 10 + } + if workers > 50 { + log.Fatalf("Number of threads cannot exceed 50") + } err = commitremap.ProcessFiles(archivePath, types, commitMap, workers) if err != nil { log.Fatal(err) diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go index 354ce17..34ebf9c 100644 --- a/internal/commitremap/commitremap.go +++ b/internal/commitremap/commitremap.go @@ -17,10 +17,11 @@ type File struct { Prefix string } -// Parses the file and returns a map of old commit hashes to new commit hashes +// Parses the commit-map file and returns a map of old commit hashes to +// new commit hashes using the old commit sha as the key + func ParseCommitMap(filePath string) (*map[string]string, error) { commitMap := make(map[string]string) - // Read the commit-map file content, err := os.ReadFile(filePath) if err != nil { return nil, err @@ -42,6 +43,7 @@ func ParseCommitMap(filePath string) (*map[string]string, error) { return &commitMap, nil } +// Processes the files in the archive and updates the commit shas func ProcessFiles(archiveLocation string, prefixes []string, commitMap *map[string]string, workers int) error { workerCount := workers @@ -57,18 +59,23 @@ func ProcessFiles(archiveLocation string, prefixes []string, go func() { f, err := os.OpenFile("processed_files.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - log.Fatalf("Error opening processed files log: %v", err) + log.Fatalf("error opening processed files log: %v", err) } defer f.Close() for file := range processedFiles { + // Clear the previous line + // \033 is the ASCII escape character + // [1A moves the cursor up one line + // [K erases the line + // https://en.wikipedia.org/wiki/ANSI_escape_code fmt.Printf("\033[1A\033[K") fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles) if _, err := f.WriteString(fmt.Sprintf("%s\n", file.FilePath)); err != nil { - log.Fatalf("Error writing to processed files log: %v", err) + log.Fatalf("error writing to processed files log: %v", err) } } }() - + // Starts a pool of workers to process the files for i := 0; i < workerCount; i++ { fileProcessWg.Add(1) go func() { @@ -76,7 +83,7 @@ func ProcessFiles(archiveLocation string, prefixes []string, for file := range fileChannel { err := updateMetadataFile(file, *commitMap) if err != nil { - log.Fatalf("Error updating metadata file: %v", err) + log.Fatalf("error updating metadata file: %v", err) } processedFiles <- file processedFilesCount++ @@ -84,7 +91,7 @@ func ProcessFiles(archiveLocation string, prefixes []string, }() } prefixWg := sync.WaitGroup{} - // Add the files to the channel + // Seperate go routines to add the files to the channel for _, file := range filesToProcess { prefixWg.Add(1) go func(file File) { @@ -99,6 +106,7 @@ func ProcessFiles(archiveLocation string, prefixes []string, return nil } +// Updates each metadata file with the new commit shas func updateMetadataFile(file File, commitMap map[string]string) error { var dataMap []interface{} data, err := os.ReadFile(file.FilePath) @@ -110,6 +118,9 @@ func updateMetadataFile(file File, commitMap map[string]string) error { if err != nil { return err } + // Processes each of the different file types contained in the archive. + // The file types listed below are currently the only types that contain + // commit shas as a distinct field. switch { case file.Prefix == "pull_requests": updatePullRequests(commitMap, &dataMap) @@ -122,27 +133,27 @@ func updateMetadataFile(file File, commitMap map[string]string) error { case file.Prefix == "commit_comments": updateCommitComments(commitMap, &dataMap) default: - return fmt.Errorf("No supported rewrite found for file type: %s", file.Prefix) + return fmt.Errorf("no supported rewrite found for file type: %s", file.Prefix) } // Pretty print the data updatedData, err := json.MarshalIndent(dataMap, "", " ") if err != nil { - return fmt.Errorf("Error marshaling updated data: %v", err) + return fmt.Errorf("error marshaling updated data: %v", err) } err = os.WriteFile(file.FilePath, updatedData, 0644) if err != nil { - return fmt.Errorf("Error writing updated data: %v", err) + return fmt.Errorf("error writing updated data: %v", err) } return nil } +// Fetches all of the files to update based on the file prefixes func getAllFilesToProcess(prefixes []string, archiveLocation string) []File { var files []File for _, prefix := range prefixes { - // Get a list of all filePaths that match the pattern filePaths, err := filepath.Glob(filepath.Join(archiveLocation, prefix+"_*.json")) for _, filePath := range filePaths { files = append(files, File{ @@ -151,7 +162,7 @@ func getAllFilesToProcess(prefixes []string, archiveLocation string) []File { }) } if err != nil { - log.Fatalf("Error getting files: %v", err) + log.Fatalf("error getting files: %v", err) } } return files diff --git a/internal/commitremap/updatemodels.go b/internal/commitremap/updatemodels.go index 42195ea..32053af 100644 --- a/internal/commitremap/updatemodels.go +++ b/internal/commitremap/updatemodels.go @@ -31,6 +31,7 @@ func updatePullRequests(commitMap map[string]string, pullRequests *[]interface{} func updatePullRequestReviews(commitMap map[string]string, pullRequestReview *[]interface{}) error { for _, prr := range *pullRequestReview { if prrMap, ok := prr.(map[string]interface{}); ok { + // head_sha if headSha, ok := prrMap["head_sha"].(string); ok { if newSha, ok := commitMap[headSha]; ok { prrMap["head_sha"] = newSha @@ -63,6 +64,7 @@ func updatePullRequestReviewComments(commitMap map[string]string, pullRequestRev func updatePullRequestReviewThreads(commitMap map[string]string, pullRequestReviewThreads *[]interface{}) error { for _, prrt := range *pullRequestReviewThreads { if prrtMap, ok := prrt.(map[string]interface{}); ok { + // commit_id, original_commit_id if commitId, ok := prrtMap["commit_id"].(string); ok { if newSha, ok := commitMap[commitId]; ok { prrtMap["commit_id"] = newSha From 3dc938a769168c001c6d6bed5e608ff98de3b091 Mon Sep 17 00:00:00 2001 From: Preston Martin Date: Mon, 9 Sep 2024 11:32:05 -0500 Subject: [PATCH 4/6] Code clarity --- .gitignore | 2 ++ internal/commitremap/commitremap.go | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 224994c..3fed684 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ go.work go.work.sum gh-commit-remap + +.DS_Store \ No newline at end of file diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go index 34ebf9c..066b1d9 100644 --- a/internal/commitremap/commitremap.go +++ b/internal/commitremap/commitremap.go @@ -10,8 +10,11 @@ import ( "path/filepath" "strings" "sync" + "sync/atomic" ) +const COMMIT_MAP_HEADER string = "old new" + type File struct { FilePath string Prefix string @@ -27,18 +30,23 @@ func ParseCommitMap(filePath string) (*map[string]string, error) { return nil, err } buf := bytes.NewBuffer(content) + if buf.Len() == 0 { + return &commitMap, nil + } scanner := bufio.NewScanner(buf) for scanner.Scan() { line := scanner.Text() // Skip adding the header to the map - if line == "old new" { + if line == COMMIT_MAP_HEADER { continue } fields := strings.Split(line, " ") + oldSha, newSha := fields[0], fields[1] + if len(fields) != 2 { return nil, fmt.Errorf("invalid line: %s", line) } - commitMap[fields[0]] = fields[1] + commitMap[oldSha] = newSha } return &commitMap, nil } @@ -52,7 +60,8 @@ func ProcessFiles(archiveLocation string, prefixes []string, filesToProcess := getAllFilesToProcess(prefixes, archiveLocation) totalFiles := len(filesToProcess) processedFiles := make(chan File, totalFiles) - processedFilesCount := 0 + var processedFilesCount atomic.Int64 + // go routine to print out the progress of the processed files. It also // writes the processed files to a log file fmt.Printf("Processed %d/%d files\n", processedFilesCount, totalFiles) @@ -86,7 +95,7 @@ func ProcessFiles(archiveLocation string, prefixes []string, log.Fatalf("error updating metadata file: %v", err) } processedFiles <- file - processedFilesCount++ + processedFilesCount.Add(1) } }() } From 390afa502eb6db1c7cd57b1187d94c686435ecaa Mon Sep 17 00:00:00 2001 From: Preston Martin Date: Tue, 10 Sep 2024 19:21:02 -0500 Subject: [PATCH 5/6] Code clarity --- internal/commitremap/commitremap.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go index 066b1d9..d3913f5 100644 --- a/internal/commitremap/commitremap.go +++ b/internal/commitremap/commitremap.go @@ -48,6 +48,9 @@ func ParseCommitMap(filePath string) (*map[string]string, error) { } commitMap[oldSha] = newSha } + if err := scanner.Err(); err != nil { + return nil, err + } return &commitMap, nil } From 89704c0adb40715a91964498b61dc53f815f56bc Mon Sep 17 00:00:00 2001 From: Suleiman Suleiman Date: Tue, 29 Oct 2024 12:54:53 -0500 Subject: [PATCH 6/6] Moving field len check for invalid input --- internal/commitremap/commitremap.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/commitremap/commitremap.go b/internal/commitremap/commitremap.go index d3913f5..2f37af1 100644 --- a/internal/commitremap/commitremap.go +++ b/internal/commitremap/commitremap.go @@ -41,11 +41,10 @@ func ParseCommitMap(filePath string) (*map[string]string, error) { continue } fields := strings.Split(line, " ") - oldSha, newSha := fields[0], fields[1] - if len(fields) != 2 { return nil, fmt.Errorf("invalid line: %s", line) } + oldSha, newSha := fields[0], fields[1] commitMap[oldSha] = newSha } if err := scanner.Err(); err != nil {