From 229ad69c77f13700163b2904051e448b443e61ae Mon Sep 17 00:00:00 2001 From: pourya Date: Thu, 3 Feb 2022 04:31:04 +0330 Subject: [PATCH 1/3] Add ExcludeExtension and IncludeExntension for filter objects/files based on their extension. --- README.md | 6 ++++++ s4cmd.py | 23 ++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3b45071..ca2d7e7 100644 --- a/README.md +++ b/README.md @@ -318,6 +318,9 @@ Specifies the customer-provided encryption key for Amazon S3 to use to decrypt t ##### `--API-ETag=[string]` Entity tag returned when the part was uploaded. +##### `--API-ExcludeExtension=[string]` +Includes all objects except objects with a specified extension. + ##### `--API-Expires=[datetime]` The date and time at which the object is no longer cacheable. @@ -345,6 +348,9 @@ Return the object only if its entity tag (ETag) is different from the one specif ##### `--API-IfUnmodifiedSince=[datetime]` Return the object only if it has not been modified since the specified time, otherwise return a 412 (precondition failed). +##### `--API-IncludeExtension=[string]` +Includes objects with a specified extension. + ##### `--API-Metadata=[dict]` A map (in json string) of metadata to store with the object in S3 diff --git a/s4cmd.py b/s4cmd.py index bcdf982..e309ec3 100755 --- a/s4cmd.py +++ b/s4cmd.py @@ -328,6 +328,8 @@ class BotoClient(object): "Specifies the customer-provided encryption key for Amazon S3 to use to decrypt the source object. The encryption key provided in this header must be one that was used when the source object was created."), ("ETag", "string", "Entity tag returned when the part was uploaded."), + ("ExcludeExtension", "string", + "Includes all objects except objects with a specified extension."), ("Expires", "datetime", "The date and time at which the object is no longer cacheable."), ("GrantFullControl", "string", @@ -346,6 +348,8 @@ class BotoClient(object): "Return the object only if its entity tag (ETag) is different from the one specified, otherwise return a 304 (not modified)."), ("IfUnmodifiedSince", "datetime", "Return the object only if it has not been modified since the specified time, otherwise return a 412 (precondition failed)."), + ("IncludeExtension", "string", + "Includes objects with a specified extension."), ("Metadata", "dict", "A map (in json string) of metadata to store with the object in S3"), ("MetadataDirective", "string", @@ -783,6 +787,13 @@ def source_expand(self, source): return result + def extension_check(self, file): + if self.opt.ExcludeExtension is not None and file.endswith(self.opt.ExcludeExtension): + return True + if self.opt.IncludeExtension is not None and not file.endswith(self.opt.IncludeExtension): + return True + return False + @log_calls def put_single_file(self, pool, source, target): '''Upload a single file or a directory by adding a task into queue''' @@ -1309,6 +1320,8 @@ def read_file_chunk(self, source, pos, chunk): @log_calls def upload(self, source, target, mpi=None, pos=0, chunk=0, part=0): '''Thread worker for upload operation.''' + if extension_check(source): + return s3url = S3URL(target) obj = self.lookup(s3url) @@ -1385,6 +1398,8 @@ def write_file_chunk(self, target, pos, chunk, body): @log_calls def download(self, source, target, mpi=None, pos=0, chunk=0, part=0): '''Thread worker for download operation.''' + if extension_check(source): + return s3url = S3URL(source) obj = self.lookup(s3url) if obj is None: @@ -1444,7 +1459,8 @@ def download(self, source, target, mpi=None, pos=0, chunk=0, part=0): @log_calls def copy(self, source, target, mpi=None, pos=0, chunk=0, part=0, delete_source=False): '''Copy a single file from source to target using boto S3 library.''' - + if extension_check(source): + return if self.opt.dry_run: message('%s => %s' % (source, target)) return @@ -1499,6 +1515,9 @@ def copy(self, source, target, mpi=None, pos=0, chunk=0, part=0, delete_source=F @log_calls def delete(self, source): '''Thread worker for download operation.''' + if extension_check(source): + return + s3url = S3URL(source) message('Delete %s', source) @@ -1521,6 +1540,8 @@ def batch_delete(self, sources): bucket = S3URL(sources[0]).bucket deletes = [] for source in sources: + if extension_check(source): + continue s3url = S3URL(source) if s3url.bucket != bucket: raise Failure('Unable to delete keys in different bucket %s and %s.' % (s3url.bucket, bucket)) From 66be70f7d3234f1eccb43afbb241db5f6310dcdb Mon Sep 17 00:00:00 2001 From: pourya Date: Thu, 3 Feb 2022 04:35:15 +0330 Subject: [PATCH 2/3] Add ExcludeExtension and IncludeExntension for filter objects/files based on their extension. --- README.md | 1 + s4cmd.py | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index ca2d7e7..5b1c4cb 100644 --- a/README.md +++ b/README.md @@ -471,3 +471,4 @@ still have to download and verify the MD5 directly. * Bloomreach http://www.bloomreach.com * Onera http://www.onera.com + diff --git a/s4cmd.py b/s4cmd.py index e309ec3..37df125 100755 --- a/s4cmd.py +++ b/s4cmd.py @@ -794,6 +794,7 @@ def extension_check(self, file): return True return False + @log_calls def put_single_file(self, pool, source, target): '''Upload a single file or a directory by adding a task into queue''' From b300f1f96d15db9b9f686c36dffa7420ae85d5e1 Mon Sep 17 00:00:00 2001 From: pourya Date: Thu, 3 Feb 2022 17:42:45 +0330 Subject: [PATCH 3/3] Fix check extension error --- s4cmd.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/s4cmd.py b/s4cmd.py index 37df125..0bf60e0 100755 --- a/s4cmd.py +++ b/s4cmd.py @@ -787,12 +787,7 @@ def source_expand(self, source): return result - def extension_check(self, file): - if self.opt.ExcludeExtension is not None and file.endswith(self.opt.ExcludeExtension): - return True - if self.opt.IncludeExtension is not None and not file.endswith(self.opt.IncludeExtension): - return True - return False + @log_calls @@ -1091,6 +1086,7 @@ def get_md5(self): self.md5 = self.file_hash(self.filename) return self.md5 + class ThreadUtil(S3Handler, ThreadPool.Worker): '''Thread workers for S3 operations. This class contains all thread workers for S3 operations. @@ -1234,6 +1230,14 @@ def conditional(self, result, obj): result.append(obj) + def extension_check(self, file): + ''' check files extension which is included or excluded ''' + if self.opt.ExcludeExtension is not None and file.endswith(self.opt.ExcludeExtension): + return True + if self.opt.IncludeExtension is not None and not file.endswith(self.opt.IncludeExtension): + return True + return False + class MultipartItem: '''Utility class for multiple part upload/download. This class is used to keep track of a single upload/download file, so @@ -1321,7 +1325,7 @@ def read_file_chunk(self, source, pos, chunk): @log_calls def upload(self, source, target, mpi=None, pos=0, chunk=0, part=0): '''Thread worker for upload operation.''' - if extension_check(source): + if self.extension_check(source): return s3url = S3URL(target) obj = self.lookup(s3url) @@ -1399,7 +1403,7 @@ def write_file_chunk(self, target, pos, chunk, body): @log_calls def download(self, source, target, mpi=None, pos=0, chunk=0, part=0): '''Thread worker for download operation.''' - if extension_check(source): + if self.extension_check(source): return s3url = S3URL(source) obj = self.lookup(s3url) @@ -1460,7 +1464,7 @@ def download(self, source, target, mpi=None, pos=0, chunk=0, part=0): @log_calls def copy(self, source, target, mpi=None, pos=0, chunk=0, part=0, delete_source=False): '''Copy a single file from source to target using boto S3 library.''' - if extension_check(source): + if self.extension_check(source): return if self.opt.dry_run: message('%s => %s' % (source, target)) @@ -1516,7 +1520,7 @@ def copy(self, source, target, mpi=None, pos=0, chunk=0, part=0, delete_source=F @log_calls def delete(self, source): '''Thread worker for download operation.''' - if extension_check(source): + if self.extension_check(source): return s3url = S3URL(source) @@ -1541,7 +1545,7 @@ def batch_delete(self, sources): bucket = S3URL(sources[0]).bucket deletes = [] for source in sources: - if extension_check(source): + if self.extension_check(source): continue s3url = S3URL(source) if s3url.bucket != bucket: