From 63256fcf3d76c2481399dc0d872da7c0d263bc8b Mon Sep 17 00:00:00 2001 From: CDemmenie Date: Thu, 4 Jul 2024 21:32:11 +0100 Subject: [PATCH 1/7] Using video_duration to cap FramesExtractor.detect_crop at the end of the video, saving time. --- .gitignore | 2 ++ tests/test_framesextractor.py | 13 +++++++++---- videohash2/framesextractor.py | 15 ++++++++++++++- videohash2/videohash.py | 8 ++++++-- 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index f9b2269..7ba7ac0 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,7 @@ coverage.xml .hypothesis/ .pytest_cache/ + # Translations *.mo *.pot @@ -146,6 +147,7 @@ dmypy.json .idea/**/usage.statistics.xml .idea/**/dictionaries .idea/**/shelf +test.py # AWS User-specific .idea/**/aws.xml diff --git a/tests/test_framesextractor.py b/tests/test_framesextractor.py index 8203e19..091ab76 100644 --- a/tests/test_framesextractor.py +++ b/tests/test_framesextractor.py @@ -12,22 +12,27 @@ def test_all(): video_path = os.path.join(script_path, os.path.pardir, "assets", "rocket.mkv") output_dir = create_and_return_temporary_directory() + video_length = 52.3 interval = 1 ffmpeg_path = None - FramesExtractor(video_path, output_dir, interval=interval, ffmpeg_path=ffmpeg_path) + FramesExtractor(video_path, output_dir, video_length, + interval=interval, ffmpeg_path=ffmpeg_path) with pytest.raises(FileNotFoundError): video_path = os.path.join(script_path, "thisvideodoesnotexist.mp4") output_dir = create_and_return_temporary_directory() - FramesExtractor(video_path, output_dir, interval=1, ffmpeg_path=None) + FramesExtractor(video_path, output_dir, video_length, interval=1, + ffmpeg_path=None) with pytest.raises(FFmpegNotFound): video_path = os.path.join(script_path, os.path.pardir, "assets", "rocket.mkv") output_dir = create_and_return_temporary_directory() ffmpeg_path = os.path.join(output_dir, "ffmpeg") - FramesExtractor(video_path, output_dir, interval=1, ffmpeg_path=ffmpeg_path) + FramesExtractor(video_path, output_dir, video_length, interval=1, + ffmpeg_path=ffmpeg_path) with pytest.raises(FramesExtractorOutPutDirDoesNotExist): video_path = os.path.join(script_path, "../assets/rocket.mkv") output_dir = os.path.join(script_path, "thisdirdoesnotexist/") - FramesExtractor(video_path, output_dir, interval=1, ffmpeg_path=None) + FramesExtractor(video_path, output_dir, video_length, interval=1, + ffmpeg_path=None) diff --git a/videohash2/framesextractor.py b/videohash2/framesextractor.py index e8e29c2..706a4ca 100644 --- a/videohash2/framesextractor.py +++ b/videohash2/framesextractor.py @@ -1,5 +1,6 @@ import os import re +import math import shlex from shutil import which from subprocess import PIPE, Popen, check_output @@ -26,6 +27,7 @@ def __init__( self, video_path: str, output_dir: str, + video_length: float, interval: Union[int, float] = 1, ffmpeg_path: Optional[str] = None, ) -> None: @@ -53,6 +55,7 @@ def __init__( """ self.video_path = video_path self.output_dir = output_dir + self.video_length = video_length self.interval = interval self.ffmpeg_path = "" if ffmpeg_path: @@ -114,6 +117,7 @@ def detect_crop( video_path: Optional[str] = None, frames: int = 3, ffmpeg_path: Optional[str] = None, + video_length: float = 2 ) -> str: """ Detects the the amount of cropping to remove black bars. @@ -144,11 +148,18 @@ def detect_crop( 7200, 14400, ] + crop_list = [] for start_time in time_start_list: + # Stopping the loop if we go beyond the end length of the video. + # We round the video length up to make sure we do get the whole + # video. + if start_time > math.ceil(video_length): + break + command = f'"{ffmpeg_path}" -ss {start_time} -i "{video_path}" -vframes {frames} -vf cropdetect -f null -' process = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) @@ -185,6 +196,7 @@ def extract(self) -> None: ffmpeg_path = self.ffmpeg_path video_path = self.video_path + video_length = self.video_length output_dir = self.output_dir if os.name == "posix": @@ -193,7 +205,8 @@ def extract(self) -> None: output_dir = shlex.quote(self.output_dir) crop = FramesExtractor.detect_crop( - video_path=video_path, frames=3, ffmpeg_path=ffmpeg_path + video_path=video_path, frames=3, ffmpeg_path=ffmpeg_path, + video_length=video_length ) command = ( diff --git a/videohash2/videohash.py b/videohash2/videohash.py index 2f3f239..47c07b5 100644 --- a/videohash2/videohash.py +++ b/videohash2/videohash.py @@ -82,7 +82,11 @@ def __init__( self._copy_video_to_video_dir() - FramesExtractor(self.video_path, self.frames_dir, interval=self.frame_interval) + self.video_duration = video_duration(self.video_path) + + FramesExtractor(self.video_path, self.frames_dir, + video_length=self.video_duration, + interval=self.frame_interval) self.collage_path = os.path.join(self.collage_dir, "collage.jpg") @@ -104,7 +108,6 @@ def __init__( self.image = Image.open(self.collage_path) self.bits_in_hash = 64 self.similar_percentage = 15 - self.video_duration = video_duration(self.video_path) self._calc_hash() @@ -675,3 +678,4 @@ def _calc_hash(self) -> None: # the binary value is prefixed with 0b. self.hash = f"0b{self.hash}" self.hash_hex: str = VideoHash.bin2hex(self.hash) + From fabe55e50c031eaec663f45c185bafc6eee3769d Mon Sep 17 00:00:00 2001 From: CDemmenie Date: Thu, 4 Jul 2024 23:47:11 +0100 Subject: [PATCH 2/7] Added do_not_copy option, also improving speed. --- videohash2/utils.py | 9 +++------ videohash2/videohash.py | 22 ++++++++++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/videohash2/utils.py b/videohash2/utils.py index af060d2..e987de4 100644 --- a/videohash2/utils.py +++ b/videohash2/utils.py @@ -21,21 +21,18 @@ def does_path_exists(path: str) -> bool: If a directory is supplied then check if it exists. If a file is supplied then check if it exists. - Directory ends with "/" on posix or "\" in windows and files do not. - If directory/file exists returns True else returns False :return: True if dir or file exists else False. :rtype: bool """ - if path.endswith("/") or path.endswith("\\"): - # it's directory - return os.path.isdir(path) + if os.path.isdir(path) or os.path.isfile(path): + return os.path.exists(path) else: # it's file - return os.path.isfile(path) + return False def create_and_return_temporary_directory() -> str: diff --git a/videohash2/videohash.py b/videohash2/videohash.py index 47c07b5..d38904d 100644 --- a/videohash2/videohash.py +++ b/videohash2/videohash.py @@ -37,6 +37,7 @@ def __init__( storage_path: Optional[str] = None, download_worst: bool = False, frame_interval: Union[int, float] = 1, + do_not_copy: bool = True, ) -> None: """ :param path: Absolute path of the input video file. @@ -74,6 +75,7 @@ def __init__( self._storage_path = self.storage_path self.download_worst = download_worst + self.do_not_copy = do_not_copy self.frame_interval = frame_interval self.task_uid = VideoHash._get_task_uid() @@ -84,9 +86,12 @@ def __init__( self.video_duration = video_duration(self.video_path) - FramesExtractor(self.video_path, self.frames_dir, - video_length=self.video_duration, - interval=self.frame_interval) + FramesExtractor( + self.video_path, + self.frames_dir, + video_length=self.video_duration, + interval=self.frame_interval, + ) self.collage_path = os.path.join(self.collage_dir, "collage.jpg") @@ -292,7 +297,10 @@ def _copy_video_to_video_dir(self) -> None: self.video_path = os.path.join(self.video_dir, (f"video.{extension}")) - shutil.copyfile(self.path, self.video_path) + if self.do_not_copy: + os.symlink(self.path, self.video_path) + else: + shutil.copyfile(self.path, self.video_path) if self.url: @@ -312,7 +320,10 @@ def _copy_video_to_video_dir(self) -> None: self.video_path = f"{self.video_dir}video.{extension}" - shutil.copyfile(downloaded_file, self.video_path) + if self.do_not_copy: + os.symlink(downloaded_file, self.video_path) + else: + shutil.copyfile(downloaded_file, self.video_path) def _create_required_dirs_and_check_for_errors(self) -> None: """ @@ -678,4 +689,3 @@ def _calc_hash(self) -> None: # the binary value is prefixed with 0b. self.hash = f"0b{self.hash}" self.hash_hex: str = VideoHash.bin2hex(self.hash) - From 5f0f988d66ffc6a6fb12fadcc9cf6e373fa18b2e Mon Sep 17 00:00:00 2001 From: CDemmenie Date: Thu, 4 Jul 2024 23:56:23 +0100 Subject: [PATCH 3/7] Fixed issue with subprocess and stdint. --- videohash2/framesextractor.py | 37 ++++++++++++++++------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/videohash2/framesextractor.py b/videohash2/framesextractor.py index 706a4ca..768e260 100644 --- a/videohash2/framesextractor.py +++ b/videohash2/framesextractor.py @@ -3,7 +3,7 @@ import math import shlex from shutil import which -from subprocess import PIPE, Popen, check_output +from subprocess import PIPE, DEVNULL, Popen, check_output from typing import Optional, Union from .exceptions import ( @@ -162,12 +162,12 @@ def detect_crop( command = f'"{ffmpeg_path}" -ss {start_time} -i "{video_path}" -vframes {frames} -vf cropdetect -f null -' - process = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) + process = Popen(shlex.split(command), stdin=DEVNULL, stdout=PIPE, stderr=PIPE) output, error = process.communicate() matches = re.findall( - r"crop\=[0-9]{1,4}:[0-9]{1,4}:[0-9]{1,4}:[0-9]{1,4}", + r"crop\=[1-9][0-9]{0,3}:[1-9][0-9]{0,3}:[0-9]{1,4}:[0-9]{1,4}", (output.decode() + error.decode()), ) @@ -178,9 +178,9 @@ def detect_crop( if len(crop_list) > 0: mode = max(crop_list, key=crop_list.count) - crop = " " + crop = [] if mode: - crop = f" -vf {mode} " + crop = ["-vf", mode] return crop @@ -209,22 +209,19 @@ def extract(self) -> None: video_length=video_length ) - command = ( - f'"{ffmpeg_path}"' - + " -i " - + f'"{video_path}"' - + f"{crop}" - + " -s 144x144 " - + " -r " - + str(self.interval) - + " " - + '"' - + output_dir - + "video_frame_%07d.jpeg" - + '"' - ) + command = [ + str(ffmpeg_path), + "-i", + str(video_path), + *crop, + "-s", + "144x144", + "-r", + str(self.interval), + str(output_dir)+"video_frame_%07d.jpeg", + ] - process = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) + process = Popen(command, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) output, error = process.communicate() ffmpeg_output = output.decode() From aec6343ca4ed1f65a424604163267408f3d4e75c Mon Sep 17 00:00:00 2001 From: CDemmenie Date: Fri, 5 Jul 2024 00:01:59 +0100 Subject: [PATCH 4/7] Updated version number. --- videohash2/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/videohash2/__version__.py b/videohash2/__version__.py index 725a2c7..281aecb 100644 --- a/videohash2/__version__.py +++ b/videohash2/__version__.py @@ -6,7 +6,7 @@ ) __url__ = "https://demmenie.github.io/videohash2/" -__version__ = "3.0.3" +__version__ = "3.1.0" __status__ = "production" __author__ = "Akash Mahanty and Chico Demmenie" __author_email__ = "cdemmenie@gmail.com" From f4fb18afca69f4f71f89ccb328c5e260508770a0 Mon Sep 17 00:00:00 2001 From: CDemmenie Date: Fri, 5 Jul 2024 00:30:06 +0100 Subject: [PATCH 5/7] Amended README to include new wiki. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 68ebc27..e1a14df 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ The video-hash-values for identical or near-duplicate videos are the same or sim ## Installation -To use this software, you must have [FFmpeg](https://ffmpeg.org/) installed. Please read [how to install FFmpeg](https://github.com/akamhy/videohash/wiki/Install-FFmpeg,-but-how%3F) if you don't already know how. +To use this software, you must have [FFmpeg](https://ffmpeg.org/) installed. Please read [how to install FFmpeg](https://github.com/demmenie/videohash2/wiki/Install-FFmpeg,-but-how%3F) if you don't already know how. #### Install videohash2 @@ -121,9 +121,9 @@ False >>> ``` -**Extended Usage** : +**Extended Usage** : -**API Reference** : +**API Reference** : -------------------------------------------------------------------------- From c09a966dcf45ec67fce0576724c4d26ad8f91114 Mon Sep 17 00:00:00 2001 From: CDemmenie Date: Fri, 5 Jul 2024 01:01:11 +0100 Subject: [PATCH 6/7] Made do_not_copy Optional --- videohash2/videohash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/videohash2/videohash.py b/videohash2/videohash.py index d38904d..22202c2 100644 --- a/videohash2/videohash.py +++ b/videohash2/videohash.py @@ -37,7 +37,7 @@ def __init__( storage_path: Optional[str] = None, download_worst: bool = False, frame_interval: Union[int, float] = 1, - do_not_copy: bool = True, + do_not_copy: Optional[bool] = True, ) -> None: """ :param path: Absolute path of the input video file. From dbee28769b27eb235a69a76c45fcdf0703ad6868 Mon Sep 17 00:00:00 2001 From: CDemmenie Date: Fri, 5 Jul 2024 01:01:56 +0100 Subject: [PATCH 7/7] Fixed return type issue in detect_crop. --- videohash2/framesextractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/videohash2/framesextractor.py b/videohash2/framesextractor.py index 768e260..785ddd2 100644 --- a/videohash2/framesextractor.py +++ b/videohash2/framesextractor.py @@ -118,7 +118,7 @@ def detect_crop( frames: int = 3, ffmpeg_path: Optional[str] = None, video_length: float = 2 - ) -> str: + ) -> list: """ Detects the the amount of cropping to remove black bars.