From 74ca801b3972566277d1cd64567ff155a9532ffe Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Sun, 9 Aug 2020 16:56:15 +0200 Subject: [PATCH 1/9] Renamed files --- filehandler.go => fitfilehandler.go | 0 locationhandler.go | 1 + 2 files changed, 1 insertion(+) rename filehandler.go => fitfilehandler.go (100%) create mode 100644 locationhandler.go diff --git a/filehandler.go b/fitfilehandler.go similarity index 100% rename from filehandler.go rename to fitfilehandler.go diff --git a/locationhandler.go b/locationhandler.go new file mode 100644 index 0000000..06ab7d0 --- /dev/null +++ b/locationhandler.go @@ -0,0 +1 @@ +package main From cb93ab988ab6b54ce0f21cf4f20dfd338181c082 Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Sun, 9 Aug 2020 17:14:09 +0200 Subject: [PATCH 2/9] Renamed files & add structs for location history data --- fitfilehandler.go => fit-file-handler.go | 3 -- location-history-handler.go | 37 ++++++++++++++++++++++++ locationhandler.go | 1 - main.go | 19 +++++++++--- 4 files changed, 52 insertions(+), 8 deletions(-) rename fitfilehandler.go => fit-file-handler.go (98%) create mode 100644 location-history-handler.go delete mode 100644 locationhandler.go diff --git a/fitfilehandler.go b/fit-file-handler.go similarity index 98% rename from fitfilehandler.go rename to fit-file-handler.go index ec437e6..d67be21 100644 --- a/fitfilehandler.go +++ b/fit-file-handler.go @@ -3,7 +3,6 @@ package main import ( "fmt" "go-file-processing-daemon/decode" - "os" "time" "github.com/bikedataproject/go-bike-data-lib/dbmodel" @@ -50,7 +49,6 @@ func HandleFitFile(file string) error { return fmt.Errorf("Could not create contribution: %v", err) } log.Infof("Added contribution for user %v", userID) - os.Remove(file) return nil } @@ -87,6 +85,5 @@ func HandleGpxFile(file string) error { return fmt.Errorf("Could not create contribution: %v", err) } log.Infof("Added contribution for user %v", user.ID) - os.Remove(file) return nil } diff --git a/location-history-handler.go b/location-history-handler.go new file mode 100644 index 0000000..a11fc69 --- /dev/null +++ b/location-history-handler.go @@ -0,0 +1,37 @@ +package main + +// PointActivity : Single activity information object +type PointActivity struct { + Type string `json:"type"` + Confidence int `json:"confidence"` +} + +// PointActivities : Collection of activities +type PointActivities struct { + TimestampMs string `json:"timestampMs"` + Activity []PointActivity `json:"activity"` +} + +// LocationHistoryPoint : Single location datapoint +type LocationHistoryPoint struct { + TimestampMs string `json:"timestampMs"` + LatitudeE7 float64 `json:"latitudeE7"` + LongitudeE7 float64 `json:"longitudeE7"` + Accuracy int `json:"accuracy"` + Activity []PointActivities `json:"activity,omitempty"` +} + +// LocationHistory : Collection of LocationHistoryPoints +type LocationHistory struct { + Locations []LocationHistoryPoint `json:"locations"` +} + +// ReadLocationFile : Parse a given JSON file and process it's contents +func HandleLocationFile(filepath string) error { + return nil +} + +// UnpackLocationFiles : Unzip a given .ZIP file's contents +func UnpackLocationFiles(filepath string) error { + return nil +} diff --git a/locationhandler.go b/locationhandler.go deleted file mode 100644 index 06ab7d0..0000000 --- a/locationhandler.go +++ /dev/null @@ -1 +0,0 @@ -package main diff --git a/main.go b/main.go index bdeb6bd..01e17f7 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "go-file-processing-daemon/config" "go-file-processing-daemon/crawl" "io/ioutil" @@ -27,14 +26,14 @@ func ReadSecret(file string) string { func main() { // Set filetypes - FileTypes := [2]string{"fit", "gpx"} + FileTypes := []string{"fit", "gpx", "zip"} // Set logging to file - logfile, err := os.OpenFile(fmt.Sprintf("log/%v.log", time.Now().Unix()), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) + /*logfile, err := os.OpenFile(fmt.Sprintf("log/%v.log", time.Now().Unix()), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf("Could not create logfile: %v", err) } - log.SetOutput(logfile) + log.SetOutput(logfile)*/ // Load configuration values conf := &config.Config{} @@ -91,10 +90,22 @@ func main() { log.Errorf("Something went wrong handling a GPX file: %v", err) } break + case "zip": + // Attempt to unzip the file + if err := UnpackLocationFiles(file); err != nil { + log.Errorf("Could not unzip %v: %v", file, err) + } else { + // Handle the ZIP file contents + if err := HandleLocationFile(file); err != nil { + log.Errorf("Could not handle location file: %v", err) + } + } + break default: log.Warnf("Trying to handle a file which is not in filetypes? (%v)", file) break } + os.Remove(file) } } } From b1f6a9025f806f5d56c6e41d34499ab58f0e1dce Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Tue, 11 Aug 2020 18:43:56 +0200 Subject: [PATCH 3/9] Prepare location file processing & unzipping --- .gitignore | 5 +- location-history-handler.go | 195 +++++++++++++++++++++++++++++++++++- main.go | 14 +-- 3 files changed, 202 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 855e8a9..b066d45 100644 --- a/.gitignore +++ b/.gitignore @@ -547,8 +547,7 @@ MigrationBackup/ # Ionide (cross platform F# VS Code tools) working folder .ionide/ -# Ignore .FIT & .GPX files -*.fit -*.gpx +# Ignore testing files +files/* # End of https://www.toptal.com/developers/gitignore/api/linux,macos,windows,jetbrains+all,go,visualstudio,visualstudiocode diff --git a/location-history-handler.go b/location-history-handler.go index a11fc69..a58d1f3 100644 --- a/location-history-handler.go +++ b/location-history-handler.go @@ -1,5 +1,28 @@ package main +import ( + "archive/zip" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/bikedataproject/go-bike-data-lib/dbmodel" + log "github.com/sirupsen/logrus" +) + +const ( + // BikeCertaintyThreshold : Threshold to validate the activity confidence against + BikeCertaintyThreshold = 40 + // BikeType : Type of activity which matches bike riding + BikeType = "ON_BICYCLE" +) + // PointActivity : Single activity information object type PointActivity struct { Type string `json:"type"` @@ -26,12 +49,178 @@ type LocationHistory struct { Locations []LocationHistoryPoint `json:"locations"` } -// ReadLocationFile : Parse a given JSON file and process it's contents +// HandleLocationFile : Parse a given JSON file and process it's contents func HandleLocationFile(filepath string) error { + // Attempt to read the file + data, err := ioutil.ReadFile(filepath) + if err != nil { + return err + } + + // Unmarshal file + var history LocationHistory + if err = json.Unmarshal(data, &history); err != nil { + return err + } + + // Convert history to trip-based objects + trips := make(map[string][]LocationHistoryPoint) + + // Loop over each individual point & organise per day + for _, point := range history.Locations { + unixMs, err := strconv.ParseInt(point.TimestampMs, 10, 64) + if err != nil { + return err + } + timestamp := time.Unix(unixMs/1000, 0) + + // Loop over the activities for each point + for _, actCollection := range point.Activity { + for _, act := range actCollection.Activity { + if act.Type == BikeType && act.Confidence >= BikeCertaintyThreshold { + // Set trip + trips[timestamp.Format("2006-01-02")] = append(trips[timestamp.Format("2006-01-02")], point) + break + } + } + } + } + + // Clean return + log.Info(filepath) return nil } // UnpackLocationFiles : Unzip a given .ZIP file's contents -func UnpackLocationFiles(filepath string) error { - return nil +func UnpackLocationFiles(filepath string) (locationfiles []string, err error) { + // Unzip & get all filenames + files, err := unzip(filepath, fmt.Sprintf("")) + if err != nil { + log.Fatal(err) + } + + // Search for the location history files + for _, file := range files { + if strings.Contains(file, ".json") { + locationfiles = append(locationfiles, file) + } + } + return +} + +// tripsToContributions : Convert location history trips to bikedataproject Contributions +func tripsToContributions(trips map[string][]LocationHistoryPoint) (contributions []dbmodel.Contribution, err error) { + for _, trip := range trips { + tsStart, err := getStartTimestamp(trip) + if err != nil { + return contributions, err + } + tsStop, err := getEndTimestamp(trip) + if err != nil { + return contributions, err + } + + _ = dbmodel.Contribution{ + UserAgent: "web/LocationHistory", + TimeStampStart: time.Unix(tsStart, 0), + TimeStampStop: time.Unix(tsStop, 0), + Distance: 0, + Duration: 0, + } + } + + return +} + +func getStartTimestamp(points []LocationHistoryPoint) (timestamp int64, err error) { + // Set timestamp to now + timestamp = time.Now().Unix() + + // Loop over trip points + for _, p := range points { + // Get timestamp in milliseconds + unixMs, err := strconv.ParseInt(p.TimestampMs, 10, 64) + if err != nil { + return 0, err + } + // Convert to UNIX timestamp + unix := unixMs / 1000 + // Check if timestamp is earlier + if unix < timestamp { + timestamp = unix + } + } + return +} + +func getEndTimestamp(points []LocationHistoryPoint) (timestamp int64, err error) { + // Set timestamp to 1970 + timestamp = 0 + + // Loop over trip points + for _, p := range points { + // Get timestamp in milliseconds + unixMs, err := strconv.ParseInt(p.TimestampMs, 10, 64) + if err != nil { + return 0, err + } + // Convert to UNIX timestamp + unix := unixMs / 1000 + // Check if timestamp is earlier + if unix > timestamp { + timestamp = unix + } + } + return +} + +// unzip : unzip a given .zip file and return the filenames of the contents +func unzip(source string, destination string) (result []string, err error) { + var filenames []string + + reader, err := zip.OpenReader(source) + if err != nil { + return filenames, err + } + defer reader.Close() + + for _, f := range reader.File { + // Store filename/path for returning and using later on + path := filepath.Join(destination, f.Name) + + // Add filename to result + result = append(result, path) + + if f.FileInfo().IsDir() { + // Make Folder + os.MkdirAll(path, os.ModePerm) + continue + } + + // Copy file & contents + if err = os.MkdirAll(filepath.Dir(path), os.ModePerm); err != nil { + return result, err + } + + outFile, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) + if err != nil { + return result, err + } + + rc, err := f.Open() + if err != nil { + return result, err + } + + _, err = io.Copy(outFile, rc) + + // Close the file without defer to close before next iteration of loop + outFile.Close() + rc.Close() + + if err != nil { + return filenames, err + } + } + return } diff --git a/main.go b/main.go index 01e17f7..8938005 100644 --- a/main.go +++ b/main.go @@ -4,7 +4,6 @@ import ( "go-file-processing-daemon/config" "go-file-processing-daemon/crawl" "io/ioutil" - "os" "strconv" "time" @@ -92,12 +91,14 @@ func main() { break case "zip": // Attempt to unzip the file - if err := UnpackLocationFiles(file); err != nil { + if locationfiles, err := UnpackLocationFiles(file); err != nil { log.Errorf("Could not unzip %v: %v", file, err) } else { - // Handle the ZIP file contents - if err := HandleLocationFile(file); err != nil { - log.Errorf("Could not handle location file: %v", err) + // Handle the ZIP file contents which are .json files + for _, locationfile := range locationfiles { + if err := HandleLocationFile(locationfile); err != nil { + log.Errorf("Could not handle location file: %v", err) + } } } break @@ -105,7 +106,8 @@ func main() { log.Warnf("Trying to handle a file which is not in filetypes? (%v)", file) break } - os.Remove(file) + // TODO: uncomment line + // os.Remove(file) } } } From 87f75e2295fc6c2e04e181cd65fd54ed821a285e Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Tue, 11 Aug 2020 18:47:54 +0200 Subject: [PATCH 4/9] Add const variables to main --- location-history-handler.go | 9 +-------- main.go | 7 +++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/location-history-handler.go b/location-history-handler.go index a58d1f3..e8f1b97 100644 --- a/location-history-handler.go +++ b/location-history-handler.go @@ -16,13 +16,6 @@ import ( log "github.com/sirupsen/logrus" ) -const ( - // BikeCertaintyThreshold : Threshold to validate the activity confidence against - BikeCertaintyThreshold = 40 - // BikeType : Type of activity which matches bike riding - BikeType = "ON_BICYCLE" -) - // PointActivity : Single activity information object type PointActivity struct { Type string `json:"type"` @@ -77,7 +70,7 @@ func HandleLocationFile(filepath string) error { // Loop over the activities for each point for _, actCollection := range point.Activity { for _, act := range actCollection.Activity { - if act.Type == BikeType && act.Confidence >= BikeCertaintyThreshold { + if act.Type == LocationHistoryCylcingType && act.Confidence >= LocationHistoryActivityThreshold { // Set trip trips[timestamp.Format("2006-01-02")] = append(trips[timestamp.Format("2006-01-02")], point) break diff --git a/main.go b/main.go index 8938005..8bc01c0 100644 --- a/main.go +++ b/main.go @@ -14,6 +14,13 @@ import ( var db dbmodel.Database +const ( + // LocationHistoryActivityThreshold : Threshold to validate the activity confidence against + LocationHistoryActivityThreshold = 40 + // LocationHistoryCylcingType : Type of activity which matches bike riding + LocationHistoryCylcingType = "ON_BICYCLE" +) + // ReadSecret : Read a file and return it's content as string - used for Docker secrets func ReadSecret(file string) string { data, err := ioutil.ReadFile(file) From 4d83eeeb9bfaaed6019e05095280957ccf58bcf1 Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Tue, 11 Aug 2020 19:35:14 +0200 Subject: [PATCH 5/9] Fix errorlogging and improve trip building --- location-history-handler.go | 49 ++++++++++++++++++++----------------- main.go | 6 ++--- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/location-history-handler.go b/location-history-handler.go index e8f1b97..5526e0d 100644 --- a/location-history-handler.go +++ b/location-history-handler.go @@ -13,7 +13,7 @@ import ( "time" "github.com/bikedataproject/go-bike-data-lib/dbmodel" - log "github.com/sirupsen/logrus" + "github.com/google/uuid" ) // PointActivity : Single activity information object @@ -53,43 +53,46 @@ func HandleLocationFile(filepath string) error { // Unmarshal file var history LocationHistory if err = json.Unmarshal(data, &history); err != nil { - return err + return fmt.Errorf("Could not unmarshall data into location history: %v", err) } - // Convert history to trip-based objects - trips := make(map[string][]LocationHistoryPoint) + if len(history.Locations) > 0 { + // Convert history to trip-based objects + trips := make(map[string][]LocationHistoryPoint) - // Loop over each individual point & organise per day - for _, point := range history.Locations { - unixMs, err := strconv.ParseInt(point.TimestampMs, 10, 64) - if err != nil { - return err - } - timestamp := time.Unix(unixMs/1000, 0) - - // Loop over the activities for each point - for _, actCollection := range point.Activity { - for _, act := range actCollection.Activity { - if act.Type == LocationHistoryCylcingType && act.Confidence >= LocationHistoryActivityThreshold { - // Set trip - trips[timestamp.Format("2006-01-02")] = append(trips[timestamp.Format("2006-01-02")], point) - break + // Loop over each individual point & organise per day + for _, point := range history.Locations { + unixMs, err := strconv.ParseInt(point.TimestampMs, 10, 64) + if err != nil { + return err + } + timestamp := time.Unix(unixMs/1000, 0) + + // Loop over the activities for each point + for _, actCollection := range point.Activity { + for _, act := range actCollection.Activity { + if act.Type == LocationHistoryCylcingType && act.Confidence >= LocationHistoryActivityThreshold { + // Set trip + trips[timestamp.Format("2006-01-02")] = append(trips[timestamp.Format("2006-01-02")], point) + break + } } } } + } else { + return fmt.Errorf("%v is not a location history file or is empty", filepath) } // Clean return - log.Info(filepath) return nil } // UnpackLocationFiles : Unzip a given .ZIP file's contents -func UnpackLocationFiles(filepath string) (locationfiles []string, err error) { +func UnpackLocationFiles(filepath string, extractPath string) (locationfiles []string, err error) { // Unzip & get all filenames - files, err := unzip(filepath, fmt.Sprintf("")) + files, err := unzip(filepath, fmt.Sprintf("%v/%v", extractPath, uuid.New())) if err != nil { - log.Fatal(err) + return } // Search for the location history files diff --git a/main.go b/main.go index 8bc01c0..f0e8709 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "go-file-processing-daemon/config" "go-file-processing-daemon/crawl" "io/ioutil" + "os" "strconv" "time" @@ -98,7 +99,7 @@ func main() { break case "zip": // Attempt to unzip the file - if locationfiles, err := UnpackLocationFiles(file); err != nil { + if locationfiles, err := UnpackLocationFiles(file, conf.FileDir); err != nil { log.Errorf("Could not unzip %v: %v", file, err) } else { // Handle the ZIP file contents which are .json files @@ -113,8 +114,7 @@ func main() { log.Warnf("Trying to handle a file which is not in filetypes? (%v)", file) break } - // TODO: uncomment line - // os.Remove(file) + os.Remove(file) } } } From 467077f5989144e9c983fa95b63acecd0724dd75 Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Thu, 13 Aug 2020 12:17:49 +0200 Subject: [PATCH 6/9] Convert location history to contribution --- location-history-handler.go | 110 ++++++++++++++++++++++-------------- main.go | 2 + 2 files changed, 71 insertions(+), 41 deletions(-) diff --git a/location-history-handler.go b/location-history-handler.go index 5526e0d..8f1b98e 100644 --- a/location-history-handler.go +++ b/location-history-handler.go @@ -14,6 +14,8 @@ import ( "github.com/bikedataproject/go-bike-data-lib/dbmodel" "github.com/google/uuid" + geo "github.com/paulmach/go.geo" + log "github.com/sirupsen/logrus" ) // PointActivity : Single activity information object @@ -79,6 +81,14 @@ func HandleLocationFile(filepath string) error { } } } + + // Convert map to Contributions + contributions, err := tripsToContributions(trips) + if err != nil { + return err + } + log.Info(contributions) + } else { return fmt.Errorf("%v is not a location history file or is empty", filepath) } @@ -107,69 +117,87 @@ func UnpackLocationFiles(filepath string, extractPath string) (locationfiles []s // tripsToContributions : Convert location history trips to bikedataproject Contributions func tripsToContributions(trips map[string][]LocationHistoryPoint) (contributions []dbmodel.Contribution, err error) { for _, trip := range trips { - tsStart, err := getStartTimestamp(trip) - if err != nil { - return contributions, err - } - tsStop, err := getEndTimestamp(trip) - if err != nil { - return contributions, err - } + // Check if trip contains more points then the threshold + if len(trip) >= LocationHistoryPointThreshold { + // Create geopath from points + geoPath := geo.NewPath() + var timestamps []time.Time + + for _, point := range trip { + // Add geopoint to path + geoPath.Push(geo.NewPoint(point.LongitudeE7/1e7, point.LatitudeE7/1e7)) + + // Get point timestamp + unixMs, err := strconv.ParseInt(point.TimestampMs, 10, 64) + if err != nil { + return contributions, err + } + // Convert to UNIX timestamp + ts := time.Unix(unixMs/1000, 0) + timestamps = append(timestamps, ts) + } - _ = dbmodel.Contribution{ - UserAgent: "web/LocationHistory", - TimeStampStart: time.Unix(tsStart, 0), - TimeStampStop: time.Unix(tsStop, 0), - Distance: 0, - Duration: 0, + // Create contribution + contrib := dbmodel.Contribution{ + UserAgent: "web/LocationHistory", + TimeStampStart: getStartTimestamp(trip), + TimeStampStop: getEndTimestamp(trip), + Distance: int(geoPath.GeoDistance()), + Duration: int(getEndTimestamp(trip).Sub(getStartTimestamp(trip)).Seconds()), + PointsGeom: geoPath, + PointsTime: timestamps, + } + + // Add contribution to array + contributions = append(contributions, contrib) } } - return } -func getStartTimestamp(points []LocationHistoryPoint) (timestamp int64, err error) { +// getStartTimestamp : get the lowest timestamp of an array of LocationHistoryPoints +func getStartTimestamp(points []LocationHistoryPoint) (timestamp time.Time) { // Set timestamp to now - timestamp = time.Now().Unix() + timestamp = time.Now() // Loop over trip points for _, p := range points { - // Get timestamp in milliseconds - unixMs, err := strconv.ParseInt(p.TimestampMs, 10, 64) - if err != nil { - return 0, err - } - // Convert to UNIX timestamp - unix := unixMs / 1000 - // Check if timestamp is earlier - if unix < timestamp { - timestamp = unix + if tmpTimestamp, err := getTimestamp(p); err == nil { + // Check if timestamp is earlier + if diff := timestamp.Sub(tmpTimestamp); diff > 0 { + timestamp = tmpTimestamp + } } } return } -func getEndTimestamp(points []LocationHistoryPoint) (timestamp int64, err error) { - // Set timestamp to 1970 - timestamp = 0 - +// getStartTimestamp : get the highest timestamp of an array of LocationHistoryPoints +func getEndTimestamp(points []LocationHistoryPoint) (timestamp time.Time) { // Loop over trip points for _, p := range points { - // Get timestamp in milliseconds - unixMs, err := strconv.ParseInt(p.TimestampMs, 10, 64) - if err != nil { - return 0, err - } - // Convert to UNIX timestamp - unix := unixMs / 1000 - // Check if timestamp is earlier - if unix > timestamp { - timestamp = unix + if tmpTimestamp, err := getTimestamp(p); err == nil { + // Check if timestamp is earlier + if diff := timestamp.Sub(tmpTimestamp); diff < 0 { + timestamp = tmpTimestamp + } } } return } +// getTimestamp : Get the timestamp of a single LocationHistoryPoint +func getTimestamp(point LocationHistoryPoint) (timestamp time.Time, err error) { + unixMs, err := strconv.ParseInt(point.TimestampMs, 10, 64) + if err != nil { + return + } + + // Convert to UNIX timestamp + timestamp = time.Unix(unixMs/1000, 0) + return +} + // unzip : unzip a given .zip file and return the filenames of the contents func unzip(source string, destination string) (result []string, err error) { var filenames []string diff --git a/main.go b/main.go index f0e8709..3b2c6af 100644 --- a/main.go +++ b/main.go @@ -20,6 +20,8 @@ const ( LocationHistoryActivityThreshold = 40 // LocationHistoryCylcingType : Type of activity which matches bike riding LocationHistoryCylcingType = "ON_BICYCLE" + // LocationHistoryPointThreshold : Threshold of minimal data points + LocationHistoryPointThreshold = 20 ) // ReadSecret : Read a file and return it's content as string - used for Docker secrets From 4cef104c4461daf7244e8c61fed27a4eb11bcd67 Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Thu, 13 Aug 2020 21:41:43 +0200 Subject: [PATCH 7/9] Updated dbmodel, creating + uploading contributions --- go.mod | 2 +- go.sum | 5 +++-- location-history-handler.go | 18 ++++++++++++++---- main.go | 4 ++-- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index fca5f74..d8ca948 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module go-file-processing-daemon go 1.13 require ( - github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200727162450-a47d3b297b9b + github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200728150720-09b74d41943c github.com/fatih/camelcase v1.0.0 // indirect github.com/fatih/structs v1.1.0 // indirect github.com/google/uuid v1.1.1 diff --git a/go.sum b/go.sum index 3c8541a..74fb922 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,8 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI= github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= -github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200727162450-a47d3b297b9b h1:g+zqEaYpgJKUBd2fhtzcGi8PqOqnhA7m2oibnWDkhSg= -github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200727162450-a47d3b297b9b/go.mod h1:puaYhkBYtfO+uSfgHater2N6t4BAeGnNqmGs0G1rifM= +github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200728150720-09b74d41943c h1:43jHCoAqjXn/iAU6piEy++6HF3hT7juI60v+q8f6DXg= +github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200728150720-09b74d41943c/go.mod h1:puaYhkBYtfO+uSfgHater2N6t4BAeGnNqmGs0G1rifM= github.com/bradfitz/latlong v0.0.0-20170410180902-f3db6d0dff40/go.mod h1:ZcXX9BndVQx6Q/JM6B8x7dLE9sl20S+TQsv4KO7tEQk= github.com/cespare/xxhash v1.0.0 h1:naDmySfoNg0nKS62/ujM6e71ZgM2AoVdaqGwMG0w18A= github.com/cespare/xxhash v1.0.0/go.mod h1:fX/lfQBkSCDXZSUgv6jVIu/EVA3/JNseAX5asI4c4T4= @@ -61,6 +61,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/image v0.0.0-20190501045829-6d32002ffd75/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/lint v0.0.0-20190409202823-959b441ac422 h1:QzoH/1pFpZguR8NrRHLcO6jKqfv2zpuSqZLgdm7ZmjI= golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= diff --git a/location-history-handler.go b/location-history-handler.go index 8f1b98e..f0d94a3 100644 --- a/location-history-handler.go +++ b/location-history-handler.go @@ -87,7 +87,16 @@ func HandleLocationFile(filepath string) error { if err != nil { return err } - log.Info(contributions) + + // Upload data to database + for _, contribution := range contributions { + user, _ := db.GetUserData("63251108") + if err := db.AddContribution(&contribution, &user); err != nil { + log.Warnf("Could not add contribution to database: %v", err) + } else { + log.Info("Add location history trip to database") + } + } } else { return fmt.Errorf("%v is not a location history file or is empty", filepath) @@ -98,9 +107,10 @@ func HandleLocationFile(filepath string) error { } // UnpackLocationFiles : Unzip a given .ZIP file's contents -func UnpackLocationFiles(filepath string, extractPath string) (locationfiles []string, err error) { +func UnpackLocationFiles(filepath string, extractPath string) (locationfiles []string, foldername string, err error) { // Unzip & get all filenames - files, err := unzip(filepath, fmt.Sprintf("%v/%v", extractPath, uuid.New())) + foldername = fmt.Sprintf("%v/%v", extractPath, uuid.New()) + files, err := unzip(filepath, foldername) if err != nil { return } @@ -143,7 +153,7 @@ func tripsToContributions(trips map[string][]LocationHistoryPoint) (contribution TimeStampStart: getStartTimestamp(trip), TimeStampStop: getEndTimestamp(trip), Distance: int(geoPath.GeoDistance()), - Duration: int(getEndTimestamp(trip).Sub(getStartTimestamp(trip)).Seconds()), + Duration: int(getEndTimestamp(trip).Sub(getEndTimestamp(trip)).Seconds()), PointsGeom: geoPath, PointsTime: timestamps, } diff --git a/main.go b/main.go index 3b2c6af..10c9434 100644 --- a/main.go +++ b/main.go @@ -101,13 +101,13 @@ func main() { break case "zip": // Attempt to unzip the file - if locationfiles, err := UnpackLocationFiles(file, conf.FileDir); err != nil { + if locationfiles, _, err := UnpackLocationFiles(file, conf.FileDir); err != nil { log.Errorf("Could not unzip %v: %v", file, err) } else { // Handle the ZIP file contents which are .json files for _, locationfile := range locationfiles { if err := HandleLocationFile(locationfile); err != nil { - log.Errorf("Could not handle location file: %v", err) + log.Warnf("Could not handle location file: %v", err) } } } From 355a518c5edd5ffc70dbe361d4e8187276e0dd6b Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Wed, 19 Aug 2020 17:56:27 +0200 Subject: [PATCH 8/9] Add fetching provideruser data from locationhistory --- decode/decoding.go | 53 +++++++++++++++++++++++++++++++++++++ go.mod | 1 + go.sum | 7 +++++ location-history-handler.go | 42 ++++++++++++++++++++++++++--- main.go | 36 ++++++++++++++++++++++++- 5 files changed, 135 insertions(+), 4 deletions(-) diff --git a/decode/decoding.go b/decode/decoding.go index 0daa1a7..f0e0225 100644 --- a/decode/decoding.go +++ b/decode/decoding.go @@ -1,15 +1,21 @@ package decode import ( + "bufio" "bytes" + "crypto/sha512" + "encoding/hex" "fmt" "io/ioutil" "math" "os" "strconv" + "strings" "time" + "github.com/PuerkitoBio/goquery" "github.com/bikedataproject/go-bike-data-lib/dbmodel" + "github.com/google/uuid" geo "github.com/paulmach/go.geo" "github.com/tkrajina/gpxgo/gpx" "github.com/tormoder/fit" @@ -154,3 +160,50 @@ func GpxToContribution(filedir string) (contrib dbmodel.Contribution, err error) return } + +// GetUserFromHTML : extract a user object from an HTML-file +func GetUserFromHTML(filepath string, usr *dbmodel.User) (err error) { + // Set global user data + usr.UserIdentifier = uuid.New().String() + usr.ExpiresAt = -1 + usr.ExpiresIn = -1 + usr.IsHistoryFetched = true + usr.Provider = "web/LocationHistory" + usr.TokenCreationDate = time.Now() + usr.AccessToken = "0" + usr.RefreshToken = "0" + + // Open HTML file + file, err := os.Open(filepath) + if err != nil { + return + } + + // Create a buffer reader from the file + reader := bufio.NewReader(file) + + // Create goquery documentreader + doc, err := goquery.NewDocumentFromReader(reader) + if err != nil { + return + } + + // Find e-mail address in document + // Find the header element first + doc.Find(".header_title").Each(func(i int, s *goquery.Selection) { + // Split the value of this element by spaces + pageTitle := strings.Split(s.Text(), " ") + // Loop over each word + for _, word := range pageTitle { + // Find the e-mail address + if strings.Contains(word, "@") { + // Hash the e-mail + hasher := sha512.New() + hasher.Write([]byte(word)) + usr.ProviderUser = hex.EncodeToString(hasher.Sum(nil)) + } + } + }) + + return +} diff --git a/go.mod b/go.mod index d8ca948..f0e789e 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module go-file-processing-daemon go 1.13 require ( + github.com/PuerkitoBio/goquery v1.5.1 github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200728150720-09b74d41943c github.com/fatih/camelcase v1.0.0 // indirect github.com/fatih/structs v1.1.0 // indirect diff --git a/go.sum b/go.sum index 74fb922..451a5a8 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,10 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI= github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= +github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= +github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200728150720-09b74d41943c h1:43jHCoAqjXn/iAU6piEy++6HF3hT7juI60v+q8f6DXg= github.com/bikedataproject/go-bike-data-lib/dbmodel v0.0.0-20200728150720-09b74d41943c/go.mod h1:puaYhkBYtfO+uSfgHater2N6t4BAeGnNqmGs0G1rifM= github.com/bradfitz/latlong v0.0.0-20170410180902-f3db6d0dff40/go.mod h1:ZcXX9BndVQx6Q/JM6B8x7dLE9sl20S+TQsv4KO7tEQk= @@ -61,8 +65,11 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/image v0.0.0-20190501045829-6d32002ffd75/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/lint v0.0.0-20190409202823-959b441ac422 h1:QzoH/1pFpZguR8NrRHLcO6jKqfv2zpuSqZLgdm7ZmjI= golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= diff --git a/location-history-handler.go b/location-history-handler.go index f0d94a3..c45cb61 100644 --- a/location-history-handler.go +++ b/location-history-handler.go @@ -2,6 +2,9 @@ package main import ( "archive/zip" + "bufio" + "crypto/sha1" + "encoding/hex" "encoding/json" "fmt" "io" @@ -12,6 +15,7 @@ import ( "strings" "time" + "github.com/PuerkitoBio/goquery" "github.com/bikedataproject/go-bike-data-lib/dbmodel" "github.com/google/uuid" geo "github.com/paulmach/go.geo" @@ -45,7 +49,7 @@ type LocationHistory struct { } // HandleLocationFile : Parse a given JSON file and process it's contents -func HandleLocationFile(filepath string) error { +func HandleLocationFile(filepath string, user dbmodel.User) error { // Attempt to read the file data, err := ioutil.ReadFile(filepath) if err != nil { @@ -90,7 +94,6 @@ func HandleLocationFile(filepath string) error { // Upload data to database for _, contribution := range contributions { - user, _ := db.GetUserData("63251108") if err := db.AddContribution(&contribution, &user); err != nil { log.Warnf("Could not add contribution to database: %v", err) } else { @@ -117,7 +120,7 @@ func UnpackLocationFiles(filepath string, extractPath string) (locationfiles []s // Search for the location history files for _, file := range files { - if strings.Contains(file, ".json") { + if strings.Contains(file, ".json") || strings.Contains(file, ".html") { locationfiles = append(locationfiles, file) } } @@ -208,6 +211,39 @@ func getTimestamp(point LocationHistoryPoint) (timestamp time.Time, err error) { return } +// getUserProvider : Read HTML-file to fetch provider user +func getProviderUser(filepath string) (id string, err error) { + // Read file + file, err := os.Open(filepath) + if err != nil { + return + } + + // Convert to bytesreader + reader := bufio.NewReader(file) + + // Convert to Goquery object + doc, err := goquery.NewDocumentFromReader(reader) + + // Find page title & Loop over results - should be just 1 + doc.Find(".header_title").Each(func(i int, s *goquery.Selection) { + // Split sentence in words + pageTitle := strings.Split(s.Text(), " ") + for _, word := range pageTitle { + // Extract e-mail address + if strings.Contains(word, "@") { + // Hash with SHA1 + hasher := sha1.New() + hasher.Write([]byte(word)) + id = hex.EncodeToString(hasher.Sum(nil)) + break + } + } + }) + + return +} + // unzip : unzip a given .zip file and return the filenames of the contents func unzip(source string, destination string) (result []string, err error) { var filenames []string diff --git a/main.go b/main.go index 10c9434..95d1c95 100644 --- a/main.go +++ b/main.go @@ -3,9 +3,11 @@ package main import ( "go-file-processing-daemon/config" "go-file-processing-daemon/crawl" + "go-file-processing-daemon/decode" "io/ioutil" "os" "strconv" + "strings" "time" "github.com/bikedataproject/go-bike-data-lib/dbmodel" @@ -100,13 +102,45 @@ func main() { } break case "zip": + // Generate user object + var user dbmodel.User + // Attempt to unzip the file if locationfiles, _, err := UnpackLocationFiles(file, conf.FileDir); err != nil { log.Errorf("Could not unzip %v: %v", file, err) } else { + // Search the HTML-file to build a user account + for _, file := range locationfiles { + if strings.Contains(file, ".html") { + err = decode.GetUserFromHTML(file, &user) + if err != nil { + // Make blank user object + log.Errorf("Could not extract user from HTML file: %v", err) + } else { + // Check if user exists + userTmp, err := db.GetUserData(user.ProviderUser) + if err != nil { + log.Infof("Could not fetch user data: %v", err) + } + // Check if userdata is empty + if userTmp.ID == "" { + // Add user to database + user, err = db.AddUser(&user) + if err != nil { + log.Errorf("Could not add new user to database: %v", err) + } else { + log.Info("Created new user from HTML file") + } + } else { + user = userTmp + } + } + } + } + // Handle the ZIP file contents which are .json files for _, locationfile := range locationfiles { - if err := HandleLocationFile(locationfile); err != nil { + if err := HandleLocationFile(locationfile, user); err != nil { log.Warnf("Could not handle location file: %v", err) } } From 2cdf6a854877a96a378453b1888ba7932b64e74e Mon Sep 17 00:00:00 2001 From: Guillaume Meurillon Date: Fri, 21 Aug 2020 14:42:33 +0200 Subject: [PATCH 9/9] Exit loop if data has been found --- decode/decoding.go | 1 + 1 file changed, 1 insertion(+) diff --git a/decode/decoding.go b/decode/decoding.go index f0e0225..f76ec21 100644 --- a/decode/decoding.go +++ b/decode/decoding.go @@ -201,6 +201,7 @@ func GetUserFromHTML(filepath string, usr *dbmodel.User) (err error) { hasher := sha512.New() hasher.Write([]byte(word)) usr.ProviderUser = hex.EncodeToString(hasher.Sum(nil)) + break } } })