From 760012fa945d54b8ba40226d69adc1ae501e1c6b Mon Sep 17 00:00:00 2001 From: AHReccese Date: Mon, 11 May 2026 20:39:32 -0400 Subject: [PATCH 1/7] Update pre-commit hook to clear metadata from both Office and image files --- .pre-commit-hooks.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 6522f8d..39e1d90 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -1,7 +1,7 @@ - id: clear-metadata - name: dmeta — clear metadata from Office files + name: dmeta — clear metadata from Office and image files description: | - Recursively strip metadata from Microsoft Office files + Recursively strip metadata from Microsoft Office and image files in-place before each commit. entry: dmeta --clear-all --inplace language: python From 9d0584535d27d36cc4144bf22e9cd15ff7f9c317 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Mon, 11 May 2026 20:40:12 -0400 Subject: [PATCH 2/7] Update argument help descriptions to clarify supported file types for metadata clearing commands --- dmeta/__main__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dmeta/__main__.py b/dmeta/__main__.py index 83af790..143b6ae 100644 --- a/dmeta/__main__.py +++ b/dmeta/__main__.py @@ -15,15 +15,15 @@ def main(): parser.add_argument( '--clear', nargs=1, - metavar=".docx file", + metavar="file", type=str, - help="the `clear` command clears all metadata in the given `.docx` file.", + help="the `clear` command clears all metadata in the given file (.docx, .pptx, .xlsx, .png, .jpg, .jpeg, .gif).", ) parser.add_argument( '--clear-all', action="store_true", default=False, - help='the `clear-all` command clears all metadata in any `.docx` file in the current directory.', + help='the `clear-all` command clears all metadata in any supported file in the current directory.', ) parser.add_argument( '--update', From 9acfde7a281d942e6448d88afb8adac331cbcad7 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Mon, 11 May 2026 20:41:26 -0400 Subject: [PATCH 3/7] Refactor metadata clearing functions to support additional file formats - Updated functions to use `get_file_format` instead of `get_microsoft_format` for better compatibility with various file types. - Introduced a new `clear_file` function to handle metadata clearing based on file format. - Adjusted `clear_all` and `update_all` functions to utilize the new format handling logic. - Enhanced documentation to reflect support for a broader range of file formats. --- dmeta/functions.py | 72 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/dmeta/functions.py b/dmeta/functions.py index bd2927a..c7e008a 100644 --- a/dmeta/functions.py +++ b/dmeta/functions.py @@ -7,9 +7,10 @@ from art import tprint import defusedxml.lxml as lxml from .errors import DMetaBaseError -from .util import get_microsoft_format, extract, read_json +from .util import get_file_format, extract, read_json from .params import CORE_XML_MAP, APP_XML_MAP, OVERVIEW, DMETA_VERSION, \ - UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR, SUPPORTED_MICROSOFT_FORMATS, \ + UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR, \ + SUPPORTED_MICROSOFT_FORMATS, SUPPORTED_FORMATS, \ JPEG_MARKER_PREFIX, JPEG_SOI, JPEG_EOI, JPEG_SOS, JPEG_COM, \ JPEG_APP_FIRST, JPEG_APP_LAST, JPEG_STANDALONE_MARKERS, \ GIF_TRAILER, GIF_EXTENSION_INTRODUCER, GIF_IMAGE_DESCRIPTOR, \ @@ -56,8 +57,8 @@ def clear(microsoft_file_name, in_place=False, verbose=False): :type verbose: bool :return: None """ - microsoft_format = get_microsoft_format(microsoft_file_name) - if microsoft_format is None: + microsoft_format = get_file_format(microsoft_file_name) + if microsoft_format is None or microsoft_format not in SUPPORTED_MICROSOFT_FORMATS: return unzipped_dir, source_file = extract(microsoft_file_name) doc_props_dir = os.path.join(unzipped_dir, "docProps") @@ -107,7 +108,7 @@ def is_metadata_cleared(xml_path, is_core=True): def clear_all(in_place=False, verbose=False): """ - Clear all the editable metadata in any Microsoft file in the current directory and its subdirectories. + Clear all the editable metadata in any supported file in the current directory and its subdirectories. :param in_place: the `in_place` flag applies the changes directly to the original file :type in_place: bool @@ -117,20 +118,20 @@ def clear_all(in_place=False, verbose=False): """ path = os.getcwd() counter = { - format: 0 for format in SUPPORTED_MICROSOFT_FORMATS + fmt: 0 for fmt in SUPPORTED_FORMATS } for root, _, files in os.walk(path): for file in files: - format = get_microsoft_format(file) - if format is None: + fmt = get_file_format(file) + if fmt is None: continue - clear(os.path.join(root, file), in_place, verbose) - counter[format] += 1 + clear_file(os.path.join(root, file), in_place, verbose) + counter[fmt] += 1 if verbose: - for format in counter.keys(): - print("Metadata of {} files with the format of {} has been cleared.".format(counter[format], format)) + for fmt in counter.keys(): + print("Metadata of {} files with the format of {} has been cleared.".format(counter[fmt], fmt)) def update(config_file_name, microsoft_file_name, in_place=False, verbose=False): @@ -158,8 +159,8 @@ def update(config_file_name, microsoft_file_name, in_place=False, verbose=False) print("There isn't any chosen personal field to remove.") return - microsoft_format = get_microsoft_format(microsoft_file_name) - if microsoft_format is None: + microsoft_format = get_file_format(microsoft_file_name) + if microsoft_format is None or microsoft_format not in SUPPORTED_MICROSOFT_FORMATS: return unzipped_dir, source_file = extract(microsoft_file_name) @@ -230,11 +231,11 @@ def update_all(config_file_name, in_place=False, verbose=False): for root, _, files in os.walk(path): for file in files: - format = get_microsoft_format(file) - if format is None: - return + fmt = get_file_format(file) + if fmt is None or fmt not in SUPPORTED_MICROSOFT_FORMATS: + continue update(config_file_name, os.path.join(root, file), in_place, verbose) - counter[format] += 1 + counter[fmt] += 1 if verbose: for format in counter.keys(): @@ -430,6 +431,39 @@ def skip_sub_blocks(start): return output_path +CLEAR_HANDLERS = { + "docx": clear, + "pptx": clear, + "xlsx": clear, + "png": clear_png_metadata, + "jpg": clear_jpeg_metadata, + "jpeg": clear_jpeg_metadata, + "gif": clear_gif_metadata, +} + + +def clear_file(file_name, in_place=False, verbose=False): + """ + Clear all metadata from the given file based on its format. + + :param file_name: path to the file + :type file_name: str + :param in_place: applies changes directly to the original file + :type in_place: bool + :param verbose: enables detailed output + :type verbose: bool + :return: path to the cleared file, or None if format is unsupported + :rtype: str or None + """ + fmt = get_file_format(file_name) + if fmt is None: + return None + handler = CLEAR_HANDLERS.get(fmt) + if handler is None: + return None + return handler(file_name, in_place, verbose) + + def extract_metadata(microsoft_file_name): """ Extract all the editable metadata from the given Microsoft file. @@ -483,7 +517,7 @@ def run_dmeta(args): """ verbose = args.verbose if args.clear: - clear(args.clear[0], args.inplace, verbose) + clear_file(args.clear[0], args.inplace, verbose) elif args.clear_all: clear_all(args.inplace, verbose) elif args.update: From 0760b2012a3bd7b332a73b3961da64bed9f123c4 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Mon, 11 May 2026 20:41:38 -0400 Subject: [PATCH 4/7] Enhance documentation and support for additional image file formats - Updated the library overview to include support for image files (.png, .jpg, .jpeg, .gif). - Introduced a new list for supported image formats and combined it with existing Microsoft formats for a comprehensive format list. --- dmeta/params.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dmeta/params.py b/dmeta/params.py index c3a1ba0..c305029 100644 --- a/dmeta/params.py +++ b/dmeta/params.py @@ -2,7 +2,7 @@ """DMeta parameters and constants.""" DMETA_VERSION = "0.4" OVERVIEW = """ -A Python library for removing personal metadata in Microsoft files(.docx, .pptx, .xlsx). +A Python library for removing personal metadata in Microsoft files(.docx, .pptx, .xlsx) and image files(.png, .jpg, .jpeg, .gif). """ CORE_XML_MAP = { @@ -36,6 +36,13 @@ "pptx", "xlsx" ] +SUPPORTED_IMAGE_FORMATS = [ + "png", + "jpg", + "jpeg", + "gif" +] +SUPPORTED_FORMATS = SUPPORTED_MICROSOFT_FORMATS + SUPPORTED_IMAGE_FORMATS # JPEG marker codes per ITU-T T.81. JPEG_MARKER_PREFIX = 0xFF JPEG_SOI = 0xD8 # Start Of Image From 2cc1413feaa439bd81f82ed2a9f5763fcad4f3db Mon Sep 17 00:00:00 2001 From: AHReccese Date: Mon, 11 May 2026 20:41:48 -0400 Subject: [PATCH 5/7] Refactor file format handling in `get_file_format` function - Renamed `get_microsoft_format` to `get_file_format` for broader applicability. - Updated the function to check against a unified list of supported formats instead of just Microsoft formats. - Enhanced documentation to clarify parameter types and return values. --- dmeta/util.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dmeta/util.py b/dmeta/util.py index 26135dc..abf9f0b 100644 --- a/dmeta/util.py +++ b/dmeta/util.py @@ -4,27 +4,28 @@ import json from shutil import rmtree from zipfile import ZipFile -from .params import SUPPORTED_MICROSOFT_FORMATS, INVALID_CONFIG_FILE_NAME_ERROR, CONFIG_FILE_DOES_NOT_EXIST_ERROR +from .params import SUPPORTED_FORMATS, INVALID_CONFIG_FILE_NAME_ERROR, CONFIG_FILE_DOES_NOT_EXIST_ERROR from .errors import DMetaBaseError -def get_microsoft_format(file_name): +def get_file_format(file_name): """ - Extract format from the end of the given microsoft file name. + Extract format from the end of the given file name. - :param file_name: name of the microsoft file name + :param file_name: name of the file :type file_name: str - :return: str + :return: format string if supported, None otherwise + :rtype: str or None """ if not isinstance(file_name, str): return None last_dot_index = file_name.rfind('.') - if (last_dot_index == -1): + if last_dot_index == -1: return None - format = file_name[last_dot_index + 1:] - if format not in SUPPORTED_MICROSOFT_FORMATS: + fmt = file_name[last_dot_index + 1:].lower() + if fmt not in SUPPORTED_FORMATS: return None - return format + return fmt def extract(file_name): From 214817ea6ecbc8e0e7a1c8f2a73b5463f971e669 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Mon, 11 May 2026 20:48:13 -0400 Subject: [PATCH 6/7] `README.md` updated --- README.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fa13daa..a0ed66a 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ ## Overview

-DMeta is an open source Python package that removes metadata of Microsoft Office files. +DMeta is an open source Python package that removes metadata of Microsoft Office files and image files.

@@ -81,7 +81,15 @@ from dmeta.functions import clear DOCX_FILE_PATH = os.path.join(os.getcwd(), "sample.docx") clear(DOCX_FILE_PATH, in_place=True) ``` -#### Clear metadata for all existing microsoft files (.docx|.pptx|.xlsx) in the current directory +#### Clear metadata for any supported file in place +```python +import os +from dmeta.functions import clear_file + +FILE_PATH = os.path.join(os.getcwd(), "photo.png") +clear_file(FILE_PATH, in_place=True) +``` +#### Clear metadata for all existing supported files (.docx|.pptx|.xlsx|.png|.jpg|.jpeg|.gif) in the current directory ```python from dmeta.functions import clear_all clear_all() @@ -114,7 +122,11 @@ update_all(CONFIG_FILE_PATH) ```console dmeta --clear "./test_a.docx" --inplace ``` -#### Clear metadata for all existing microsoft files (.docx|.pptx|.xlsx) in the current directory +#### Clear metadata for a .png file in place +```console +dmeta --clear "./photo.png" --inplace +``` +#### Clear metadata for all existing supported files (.docx|.pptx|.xlsx|.png|.jpg|.jpeg|.gif) in the current directory ```console dmeta --clear-all ``` @@ -162,7 +174,7 @@ repos: pre-commit install # or pre_commit install (in windows) ``` -Now, every time you `git commit`, Dmeta will automatically clear metadata from any Microsoft files in-place. +Now, every time you `git commit`, Dmeta will automatically clear metadata from any supported files in-place. #### ⚠️ Important: Clean Before You Commit @@ -186,6 +198,9 @@ If you run `git add` on Office files that still contain embedded metadata, the p | Microsoft Word (.docx) | ✅ | | Microsoft PowerPoint (.pptx) | ✅ | | Microsoft Excel (.xlsx) | ✅ | +| PNG (.png) | ✅ | +| JPEG (.jpg, .jpeg) | ✅ | +| GIF (.gif) | ✅ | ## Issues & bug reports From a452aa2c89003596b97e43c6a50c4a480d85c792 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Mon, 11 May 2026 20:49:02 -0400 Subject: [PATCH 7/7] Update CHANGELOG.md to reflect recent changes in functions and parameters - Added new functions: `get_file_format`, `clear_file`, `clear_all`, and `update_all`. - Updated existing functions: `clear` and `update`. - Removed `get_microsoft_format` function. - Enhanced CLI help text and updated pre-commit hooks. - Documented changes in the changelog. --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1f0a9b..7607658 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,12 +12,23 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - `clear_jpeg_metadata` function in `functions.py` - `clear_png_metadata` function in `functions.py` - `extract_metadata` function in `functions.py` +- `SUPPORTED_IMAGE_FORMATS` and `SUPPORTED_FORMATS` in `params.py` +- `get_file_format` function in `util.py` +- `CLEAR_HANDLERS` dict in `functions.py` +- `clear_file` function in `functions.py` ### Changed - `test.yml` - `clear` function in `functions.py` - `update` function in `functions.py` +- `clear_all` function in `functions.py` +- `update_all` function in `functions.py` +- `run_dmeta` function in `functions.py` +- CLI help text in `__main__.py` +- `.pre-commit-hooks.yaml` updated - Test system modified - `README.md` updated +### Removed +- `get_microsoft_format` function in `util.py` ## [0.4] - 2025-06-16 ### Added - `Acknowledgments` in `README.md`