diff --git a/README.md b/README.md index e8f2bb3..622b2e9 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,76 @@ +# Disk Forensics Toolkit -# Zestaw Narzędzi do Analizy Obrazów Dyskowych +An interactive command-line toolkit for digital-forensics analysis of disk +images: it mounts raw/E01 images, extracts filesystem and partition metadata, +recovers deleted files, searches recovered PDFs for keywords, and exports +password-encrypted, timestamped PDF reports. -## Przegląd -Zestaw narzędzi przeznaczony do analizy obrazów dyskowych w celach kryminalistycznych. Oferuje szereg funkcjonalności, w tym odzyskiwanie plików, ekstrakcję metadanych z obrazów dysków, ekstrakcję tekstu z plików PDF oraz wyszukiwanie określonych słów w tych plikach. Jest szczególnie przydatny w dochodzeniach kryminalistycznych, pomagając odkrywać i analizować dowody z obrazów dyskowych. +## Features -## Funkcje -- **Odzyskiwanie Plików**: Odzyskiwanie plików z obrazów dysków za pomocą narzędzi takich jak 'foremost'. -- **Ekstrakcja Metadanych**: Ekstrakcja i analiza metadanych z różnych formatów obrazów, w tym raw i EWF (Expert Witness Format). -- **Analiza PDF**: Ekstrakcja tekstu z plików PDF i wyszukiwanie w nich określonych słów lub fraz. -- **Raportowanie**: Generowanie i szyfrowanie raportów PDF z analizy, kompletnych ze znakami wodnymi z datą. +- **File recovery** — carve files from a disk image with [`foremost`](https://github.com/korczis/foremost). +- **Metadata extraction** — read E01 header/hash metadata and the DOS partition + table via `pytsk3` / `pyewf`, exported as an encrypted PDF. +- **Evidence info** — walk the image's filesystem and report filenames, sizes + and timestamps. +- **PDF forensic analysis** — extract text from recovered PDFs (PyMuPDF) and + count occurrences of investigator-supplied keywords. +- **Reporting** — every report is watermarked with a timestamp and encrypted + with a password. -## Instalacja -... +## Requirements -## Zależności -- Python 3.x -- Biblioteki: pytsk3, pyewf, PyPDF2 +- **Python 3.10+** +- **System tools:** `foremost` (file carving) and `poppler-utils` (for + `pdf2image`). On Debian/Ubuntu: `sudo apt install foremost poppler-utils`. +- **Python packages:** see `requirements.txt` (`pytsk3`, `libewf-python`, + `PyMuPDF`, `reportlab`, `PyPDF2`, `pdf2image`, `Pillow`, `tabulate`, `pytz`). + +```bash +git clone https://github.com/paulpel/disk-forensics-toolkit.git +cd disk-forensics-toolkit + +uv venv && source .venv/bin/activate # or: python -m venv venv && source venv/bin/activate +uv pip install -r requirements.txt +``` + +> `pytsk3` and `libewf-python` build against system libraries +> (`libtsk`, `libewf`); install those dev packages first if the build fails. + +## Usage + +Place your disk images under `DiskImages/` and run the interactive menu: + +```bash +python main.py +``` + +``` +1. Choose disk image +2. Extract information about documents +3. Extract metadata +4. Recover files +5. Forensic PDF analysis +6. Change password +7. Toggle Base64 encoding +8. Exit +``` + +Each module also runs standalone, e.g.: + +```bash +python recovery_files.py path/to/image.raw +python evidence_metadata.py path/to/image.E01 ewf -p DOS -f report.pdf -pwd +``` + +## Configuration + +| Setting | How | +|---------|-----| +| Report encryption password | menu option 6 (default is a placeholder — change it) | +| Recovery output folder | `RECOVERY_OUTPUT_DIR` env var (defaults to `RecoveredDiskImages/`) | +| Base64-encode report fields | menu option 7 | + +## Output + +Reports are written as encrypted, timestamp-watermarked PDFs in the working +directory; recovered files land under the recovery output folder. diff --git a/evidence_metadata.py b/evidence_metadata.py index 00bbff4..b3355db 100644 --- a/evidence_metadata.py +++ b/evidence_metadata.py @@ -1,4 +1,3 @@ -from __future__ import print_function import argparse import os import pytsk3 @@ -6,7 +5,7 @@ from tabulate import tabulate from reportlab.lib.pagesizes import letter from reportlab.platypus import SimpleDocTemplate, Table -from PyPDF2 import PdfReader, PdfWriter +from PyPDF2 import PdfWriter from datetime import datetime from pdf2image import convert_from_path import tempfile @@ -14,8 +13,6 @@ from reportlab.platypus import TableStyle import base64 from reportlab.pdfgen import canvas -from PyPDF2 import PageObject -from PyPDF2 import PdfWriter from PIL import ImageDraw, ImageFont import pytz @@ -94,6 +91,10 @@ def main(image, img_type, part_type, password, encode_base64=True): invalid file format or inaccessible file system/partition table. """ print("[+] Opening {}".format(image)) + # Default header/hash tables so a raw image (no EWF metadata) doesn't leave + # these unbound when the report is built. + header_table = [["Header Field", "Value"]] + hash_table = [["Acquisition", "Value"]] if img_type == "ewf": try: filenames = pyewf.glob(image) @@ -122,18 +123,12 @@ def main(image, img_type, part_type, password, encode_base64=True): print("[-] Unable to read partition table or file system:\n {}".format(e)) return - if volume: - part_metadata(volume) - elif fs: - # Handle file system analysis if needed - pass - else: - print("No partition or file system detected.") - + table_1 = [["Index", "Type"]] + table_2 = [["Offset Start (Sectors)", "Length (Sectors)"]] if volume: table_1, table_2 = part_metadata(volume) elif fs: - # Handle file system analysis if needed + # File system analysis is not implemented yet. pass else: print("No partition or file system detected.") @@ -258,16 +253,17 @@ def encrypt_pdf(input_pdf, password): "Date: %Y-%m-%d \nTime: %H:%M:%S" ) + try: + watermark_font = ImageFont.truetype("DejaVuSans.ttf", 64) + except OSError: + watermark_font = ImageFont.load_default() + for image in images: # Draw watermark - width, height = image.size - x = width / 2 - y = height / 2 - draw = ImageDraw.Draw(image) text = f"Timestamp: \n {current_datetime}" draw.text( - (10, 10), text=text, fill=(185, 185, 185), fontsize=64 + (10, 10), text=text, fill=(185, 185, 185), font=watermark_font ) # Adjust position and color as needed # Create a temporary PDF file for each image diff --git a/main.py b/main.py index dcc4b1e..1434398 100644 --- a/main.py +++ b/main.py @@ -14,8 +14,8 @@ def __init__(self): self.dir_names = ["2023", "2024"] self.disk_images = self.find_images() self.directories = None - self.choosen_image = None - self.choosen_dir = None + self.chosen_image = None + self.chosen_dir = None self.password = "123" self.encode_base64 = True self.menu_text = self.build_menu_text() @@ -116,7 +116,7 @@ def choose_disk_image(self): if choice.isdigit(): choice = int(choice) if 1 <= choice <= len(self.disk_images): - self.choosen_image = list(self.disk_images.values())[choice - 1] + self.chosen_image = list(self.disk_images.values())[choice - 1] print( f"\nSelected disk image: {list(self.disk_images.keys())[choice - 1]}" ) @@ -163,12 +163,12 @@ def print_menu(self): It also displays the currently chosen disk image (if any) at the top of the menu. If no disk image is selected, it indicates so. """ - choosen_image_text = ( - f"Choosen disk image: {os.path.basename(self.choosen_image)}" - if self.choosen_image + chosen_image_text = ( + f"Choosen disk image: {os.path.basename(self.chosen_image)}" + if self.chosen_image else "No disk image selected." ) - print(f"{choosen_image_text}{self.menu_text}") + print(f"{chosen_image_text}{self.menu_text}") def extract_info(self): """ @@ -183,21 +183,20 @@ def extract_info(self): validation, it proceeds with the extraction process and handles any exceptions that occur. """ print("[+] Extracting information about documents...") - print(self.choosen_image) - if not self.choosen_image or not self.choosen_image.endswith( + if not self.chosen_image or not self.chosen_image.endswith( (".E01", ".raw", ".dd") ): print( "[-] Invalid or no image selected. Choose a valid E01 or raw image first." ) - self.choose_disk_image(change=False) + self.choose_disk_image() return try: open_evidence_main( - self.choosen_image, - "ewf" if self.choosen_image.endswith(".E01") else "raw", + self.chosen_image, + "ewf" if self.chosen_image.endswith(".E01") else "raw", password=self.password, encode_base64=self.encode_base64, ) @@ -218,18 +217,18 @@ def extract_metadata(self): """ print("[+] Extracting metadata from the disk image...") - if not self.choosen_image or not self.choosen_image.endswith( + if not self.chosen_image or not self.chosen_image.endswith( (".E01", ".raw", ".dd") ): print( "[-] Invalid or no image selected. Choose a valid E01 or raw image first." ) - self.choose_disk_image(change=False) + self.choose_disk_image() return try: evidence_metadata_main( - self.choosen_image, - "ewf" if self.choosen_image.endswith(".E01") else "raw", + self.chosen_image, + "ewf" if self.chosen_image.endswith(".E01") else "raw", part_type="DOS", encode_base64=self.encode_base64, password=self.password, @@ -251,17 +250,17 @@ def recover_files(self): """ print("[+] Recovering files from the disk image...") - if not self.choosen_image or not self.choosen_image.endswith( + if not self.chosen_image or not self.chosen_image.endswith( (".E01", ".raw", ".dd") ): print( "[-] Invalid or no image selected. Choose a valid E01 or raw image first." ) - self.choose_disk_image(change=False) + self.choose_disk_image() return try: recovery_files_main( - self.choosen_image, + self.chosen_image, ) except Exception as e: print(f"[-] Error extracting metadata:\n {e}") diff --git a/notatki.txt b/notatki.txt deleted file mode 100644 index 53a10b3..0000000 --- a/notatki.txt +++ /dev/null @@ -1,7 +0,0 @@ -uniemoliwić edycje cyfrowo -zabezpieczyc i opisać dokument -podpiecie foremost i uzyskanie z niego informacje -ekstrakcja z obrazow -ekstrakcja z pdfów [priorytet] - parametr wyszukiwania znaków, będzie podany przez Doktora -dokumentacja technicza -dzialający projekt/test - dokumentacja techniczna \ No newline at end of file diff --git a/open_evidence.py b/open_evidence.py index 1b0de89..d82d2a8 100644 --- a/open_evidence.py +++ b/open_evidence.py @@ -15,7 +15,7 @@ from reportlab.pdfgen import canvas from pdf2image import convert_from_path import tempfile -from PIL import ImageDraw +from PIL import ImageDraw, ImageFont import pytz @@ -188,16 +188,17 @@ def encode_pdf(input_pdf, password): current_datetime = datetime.now(timezone).strftime( "Date: %Y-%m-%d \nTime: %H:%M:%S" ) + try: + watermark_font = ImageFont.truetype("DejaVuSans.ttf", 64) + except OSError: + watermark_font = ImageFont.load_default() + for image in images: # Draw watermark - width, height = image.size - x = width / 2 - y = height / 2 - draw = ImageDraw.Draw(image) text = f"Timestamp: \n {current_datetime}" draw.text( - (10, 10), text=text, fill=(185, 185, 185), fontsize=64 + (10, 10), text=text, fill=(185, 185, 185), font=watermark_font ) # Adjust position and color as needed # Create a temporary PDF file for each image diff --git a/pdf_analysis.py b/pdf_analysis.py index 41e24f3..eeeb141 100644 --- a/pdf_analysis.py +++ b/pdf_analysis.py @@ -95,16 +95,21 @@ def write_analysis_to_file(results, output_file): is saved at the specified output path. """ - pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf")) + try: + pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf")) + font_name = "DejaVuSans" + except Exception: + # DejaVuSans.ttf isn't bundled with the repo; fall back to a built-in font. + font_name = "Helvetica" c = canvas.Canvas(output_file, pagesize=letter) width, height = letter y_position = height - 40 x_position = 40 - c.setFont("DejaVuSans", 12) + c.setFont(font_name,12) c.drawString(x_position, y_position, "PDF Analysis Results") - c.setFont("DejaVuSans", 10) + c.setFont(font_name,10) y_position -= 20 for pdf, counts in results.items(): diff --git a/recovery_files.py b/recovery_files.py index 68d608c..13683d3 100644 --- a/recovery_files.py +++ b/recovery_files.py @@ -23,14 +23,13 @@ def recover_files(evidence_file): timezone = pytz.timezone("Europe/Warsaw") current_datetime = datetime.now(timezone).strftime("%Y_%m_%d_%H_%M_%S") - image_name = evidence_file.split("/")[1] - - output_directory = ( - "~/infa/Disk-Project/RecoveredDiskImages/" - + image_name.split(".")[0] - + "_" - + current_datetime + # Output base is configurable via RECOVERY_OUTPUT_DIR; defaults to a local + # RecoveredDiskImages/ folder (no hardcoded developer path). + output_base = os.path.expanduser( + os.environ.get("RECOVERY_OUTPUT_DIR", "RecoveredDiskImages") ) + image_name = os.path.splitext(os.path.basename(evidence_file))[0] + output_directory = os.path.join(output_base, f"{image_name}_{current_datetime}") if not os.path.exists(output_directory): os.makedirs(output_directory) diff --git a/requirements.txt b/requirements.txt index fafc713..d474019 100644 Binary files a/requirements.txt and b/requirements.txt differ