diff --git a/.gitignore b/.gitignore index f9b2269..7ba7ac0 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,7 @@ coverage.xml .hypothesis/ .pytest_cache/ + # Translations *.mo *.pot @@ -146,6 +147,7 @@ dmypy.json .idea/**/usage.statistics.xml .idea/**/dictionaries .idea/**/shelf +test.py # AWS User-specific .idea/**/aws.xml diff --git a/README.md b/README.md index 68ebc27..e1a14df 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ The video-hash-values for identical or near-duplicate videos are the same or sim ## Installation -To use this software, you must have [FFmpeg](https://ffmpeg.org/) installed. Please read [how to install FFmpeg](https://github.com/akamhy/videohash/wiki/Install-FFmpeg,-but-how%3F) if you don't already know how. +To use this software, you must have [FFmpeg](https://ffmpeg.org/) installed. Please read [how to install FFmpeg](https://github.com/demmenie/videohash2/wiki/Install-FFmpeg,-but-how%3F) if you don't already know how. #### Install videohash2 @@ -121,9 +121,9 @@ False >>> ``` -**Extended Usage** : +**Extended Usage** : -**API Reference** : +**API Reference** : -------------------------------------------------------------------------- diff --git a/tests/test_framesextractor.py b/tests/test_framesextractor.py index 8203e19..091ab76 100644 --- a/tests/test_framesextractor.py +++ b/tests/test_framesextractor.py @@ -12,22 +12,27 @@ def test_all(): video_path = os.path.join(script_path, os.path.pardir, "assets", "rocket.mkv") output_dir = create_and_return_temporary_directory() + video_length = 52.3 interval = 1 ffmpeg_path = None - FramesExtractor(video_path, output_dir, interval=interval, ffmpeg_path=ffmpeg_path) + FramesExtractor(video_path, output_dir, video_length, + interval=interval, ffmpeg_path=ffmpeg_path) with pytest.raises(FileNotFoundError): video_path = os.path.join(script_path, "thisvideodoesnotexist.mp4") output_dir = create_and_return_temporary_directory() - FramesExtractor(video_path, output_dir, interval=1, ffmpeg_path=None) + FramesExtractor(video_path, output_dir, video_length, interval=1, + ffmpeg_path=None) with pytest.raises(FFmpegNotFound): video_path = os.path.join(script_path, os.path.pardir, "assets", "rocket.mkv") output_dir = create_and_return_temporary_directory() ffmpeg_path = os.path.join(output_dir, "ffmpeg") - FramesExtractor(video_path, output_dir, interval=1, ffmpeg_path=ffmpeg_path) + FramesExtractor(video_path, output_dir, video_length, interval=1, + ffmpeg_path=ffmpeg_path) with pytest.raises(FramesExtractorOutPutDirDoesNotExist): video_path = os.path.join(script_path, "../assets/rocket.mkv") output_dir = os.path.join(script_path, "thisdirdoesnotexist/") - FramesExtractor(video_path, output_dir, interval=1, ffmpeg_path=None) + FramesExtractor(video_path, output_dir, video_length, interval=1, + ffmpeg_path=None) diff --git a/videohash2/__version__.py b/videohash2/__version__.py index 725a2c7..281aecb 100644 --- a/videohash2/__version__.py +++ b/videohash2/__version__.py @@ -6,7 +6,7 @@ ) __url__ = "https://demmenie.github.io/videohash2/" -__version__ = "3.0.3" +__version__ = "3.1.0" __status__ = "production" __author__ = "Akash Mahanty and Chico Demmenie" __author_email__ = "cdemmenie@gmail.com" diff --git a/videohash2/framesextractor.py b/videohash2/framesextractor.py index e8e29c2..785ddd2 100644 --- a/videohash2/framesextractor.py +++ b/videohash2/framesextractor.py @@ -1,8 +1,9 @@ import os import re +import math import shlex from shutil import which -from subprocess import PIPE, Popen, check_output +from subprocess import PIPE, DEVNULL, Popen, check_output from typing import Optional, Union from .exceptions import ( @@ -26,6 +27,7 @@ def __init__( self, video_path: str, output_dir: str, + video_length: float, interval: Union[int, float] = 1, ffmpeg_path: Optional[str] = None, ) -> None: @@ -53,6 +55,7 @@ def __init__( """ self.video_path = video_path self.output_dir = output_dir + self.video_length = video_length self.interval = interval self.ffmpeg_path = "" if ffmpeg_path: @@ -114,7 +117,8 @@ def detect_crop( video_path: Optional[str] = None, frames: int = 3, ffmpeg_path: Optional[str] = None, - ) -> str: + video_length: float = 2 + ) -> list: """ Detects the the amount of cropping to remove black bars. @@ -144,19 +148,26 @@ def detect_crop( 7200, 14400, ] + crop_list = [] for start_time in time_start_list: + # Stopping the loop if we go beyond the end length of the video. + # We round the video length up to make sure we do get the whole + # video. + if start_time > math.ceil(video_length): + break + command = f'"{ffmpeg_path}" -ss {start_time} -i "{video_path}" -vframes {frames} -vf cropdetect -f null -' - process = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) + process = Popen(shlex.split(command), stdin=DEVNULL, stdout=PIPE, stderr=PIPE) output, error = process.communicate() matches = re.findall( - r"crop\=[0-9]{1,4}:[0-9]{1,4}:[0-9]{1,4}:[0-9]{1,4}", + r"crop\=[1-9][0-9]{0,3}:[1-9][0-9]{0,3}:[0-9]{1,4}:[0-9]{1,4}", (output.decode() + error.decode()), ) @@ -167,9 +178,9 @@ def detect_crop( if len(crop_list) > 0: mode = max(crop_list, key=crop_list.count) - crop = " " + crop = [] if mode: - crop = f" -vf {mode} " + crop = ["-vf", mode] return crop @@ -185,6 +196,7 @@ def extract(self) -> None: ffmpeg_path = self.ffmpeg_path video_path = self.video_path + video_length = self.video_length output_dir = self.output_dir if os.name == "posix": @@ -193,25 +205,23 @@ def extract(self) -> None: output_dir = shlex.quote(self.output_dir) crop = FramesExtractor.detect_crop( - video_path=video_path, frames=3, ffmpeg_path=ffmpeg_path + video_path=video_path, frames=3, ffmpeg_path=ffmpeg_path, + video_length=video_length ) - command = ( - f'"{ffmpeg_path}"' - + " -i " - + f'"{video_path}"' - + f"{crop}" - + " -s 144x144 " - + " -r " - + str(self.interval) - + " " - + '"' - + output_dir - + "video_frame_%07d.jpeg" - + '"' - ) + command = [ + str(ffmpeg_path), + "-i", + str(video_path), + *crop, + "-s", + "144x144", + "-r", + str(self.interval), + str(output_dir)+"video_frame_%07d.jpeg", + ] - process = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) + process = Popen(command, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) output, error = process.communicate() ffmpeg_output = output.decode() diff --git a/videohash2/utils.py b/videohash2/utils.py index af060d2..e987de4 100644 --- a/videohash2/utils.py +++ b/videohash2/utils.py @@ -21,21 +21,18 @@ def does_path_exists(path: str) -> bool: If a directory is supplied then check if it exists. If a file is supplied then check if it exists. - Directory ends with "/" on posix or "\" in windows and files do not. - If directory/file exists returns True else returns False :return: True if dir or file exists else False. :rtype: bool """ - if path.endswith("/") or path.endswith("\\"): - # it's directory - return os.path.isdir(path) + if os.path.isdir(path) or os.path.isfile(path): + return os.path.exists(path) else: # it's file - return os.path.isfile(path) + return False def create_and_return_temporary_directory() -> str: diff --git a/videohash2/videohash.py b/videohash2/videohash.py index 2f3f239..22202c2 100644 --- a/videohash2/videohash.py +++ b/videohash2/videohash.py @@ -37,6 +37,7 @@ def __init__( storage_path: Optional[str] = None, download_worst: bool = False, frame_interval: Union[int, float] = 1, + do_not_copy: Optional[bool] = True, ) -> None: """ :param path: Absolute path of the input video file. @@ -74,6 +75,7 @@ def __init__( self._storage_path = self.storage_path self.download_worst = download_worst + self.do_not_copy = do_not_copy self.frame_interval = frame_interval self.task_uid = VideoHash._get_task_uid() @@ -82,7 +84,14 @@ def __init__( self._copy_video_to_video_dir() - FramesExtractor(self.video_path, self.frames_dir, interval=self.frame_interval) + self.video_duration = video_duration(self.video_path) + + FramesExtractor( + self.video_path, + self.frames_dir, + video_length=self.video_duration, + interval=self.frame_interval, + ) self.collage_path = os.path.join(self.collage_dir, "collage.jpg") @@ -104,7 +113,6 @@ def __init__( self.image = Image.open(self.collage_path) self.bits_in_hash = 64 self.similar_percentage = 15 - self.video_duration = video_duration(self.video_path) self._calc_hash() @@ -289,7 +297,10 @@ def _copy_video_to_video_dir(self) -> None: self.video_path = os.path.join(self.video_dir, (f"video.{extension}")) - shutil.copyfile(self.path, self.video_path) + if self.do_not_copy: + os.symlink(self.path, self.video_path) + else: + shutil.copyfile(self.path, self.video_path) if self.url: @@ -309,7 +320,10 @@ def _copy_video_to_video_dir(self) -> None: self.video_path = f"{self.video_dir}video.{extension}" - shutil.copyfile(downloaded_file, self.video_path) + if self.do_not_copy: + os.symlink(downloaded_file, self.video_path) + else: + shutil.copyfile(downloaded_file, self.video_path) def _create_required_dirs_and_check_for_errors(self) -> None: """