IndicatorOfCanary/docx_patch.py at main · HackingLZ/IndicatorOfCanary · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/python3
"""
Patch/replace canary URLs in DOCX files
Allows replacing known canary URLs with custom URLs
"""

import argparse
import zipfile
import os
import re
import tempfile
import shutil
from colorama import Fore, Style, init
import canary_config as config
import canary_utils as utils

init()

def modify_all_files_in_zip(zip_path, search_pattern, replace_with, verbose=False):
    """
    Modify all matching URLs in a DOCX file

    Args:
        zip_path: Path to input DOCX file
        search_pattern: Pattern to search for
        replace_with: Replacement string
        verbose: Print detailed progress

    Returns:
        Tuple of (modified_zip_path, modified_files)
    """
    # Use proper temp directory
    temp_dir = tempfile.mkdtemp(prefix='docx_patch_')

    try:
        # Extract the DOCX
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)

        modified_files = []

        # Process all files
        for root, dirs, files in os.walk(temp_dir):
            for file in files:
                file_path = os.path.join(root, file)
                relative_path = os.path.relpath(file_path, temp_dir)

                # Skip binary files
                if file.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                    continue

                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                        contents = f.read()

                    # Perform replacement
                    new_contents = re.sub(search_pattern, replace_with, contents)

                    if new_contents != contents:
                        with open(file_path, 'w', encoding='utf-8', errors='ignore') as f:
                            f.write(new_contents)
                        modified_files.append(relative_path)

                        if verbose:
                            print(f"{Fore.GREEN}✓{Style.RESET_ALL} Modified: {relative_path}")
                except Exception as e:
                    if verbose:
                        print(f"{Fore.YELLOW}⚠{Style.RESET_ALL} Skipped {relative_path}: {e}")

        # Create output filename
        base_name = os.path.splitext(os.path.basename(zip_path))[0]
        modified_zip_path = f"{base_name}_patched.docx"

        # Create new DOCX with modified content
        with zipfile.ZipFile(modified_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, dirs, files in os.walk(temp_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    arc_path = os.path.relpath(file_path, temp_dir)
                    zipf.write(file_path, arc_path)

        return modified_zip_path, modified_files

    finally:
        # Clean up temp directory
        shutil.rmtree(temp_dir, ignore_errors=True)

def parse_args():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(
        description='Replace canary URLs in .docx files',
        epilog='Example: %(prog)s -i document.docx -s "canarytokens.com" -r "safe.example.com"'
    )
    parser.add_argument('-i', '--input', required=True,
                      help='Input .docx file path')
    parser.add_argument('-s', '--search',
                      help='URL/pattern to search for (if not specified, replaces all known canary domains)')
    parser.add_argument('-r', '--replace', '--host', required=True,
                      help='Replacement URL/host')
    parser.add_argument('-o', '--output',
                      help='Output file path (default: input_patched.docx)')
    parser.add_argument('-v', '--verbose', action='store_true',
                      help='Verbose output')
    parser.add_argument('--regex', action='store_true',
                      help='Treat search pattern as regex')

    return parser.parse_args()

def build_search_pattern(search_str, use_regex):
    """
    Build search pattern from string

    Args:
        search_str: Search string or None for all canary domains
        use_regex: Whether to treat as regex

    Returns:
        Compiled regex pattern
    """
    if search_str:
        if use_regex:
            return search_str
        else:
            # Escape special regex characters
            return re.escape(search_str)
    else:
        # Build pattern for all known canary domains
        domains = '|'.join(re.escape(domain) for domain in config.ALERT_DOMAINS)
        return f'https?://(?:[\\w.-]+\\.)?(?:{domains})(?:/[\\w.-]*)*'

def main():
    args = parse_args()

    # Validate input file
    try:
        utils.validate_file_exists(args.input)
    except (FileNotFoundError, ValueError, PermissionError) as e:
        print(f"{Fore.RED}Error: {e}{Style.RESET_ALL}")
        return 1

    # Build search pattern
    search_pattern = build_search_pattern(args.search, args.regex)

    if args.verbose:
        print(f"Input file: {args.input}")
        if args.search:
            print(f"Search pattern: {args.search}")
        else:
            print(f"Replacing all known canary domains")
        print(f"Replacement: {args.replace}")
        print("-" * 50)

    try:
        # Perform the patching
        modified_zip_path, modified_files = modify_all_files_in_zip(
            args.input,
            search_pattern,
            args.replace,
            args.verbose
        )

        # Rename to custom output if specified
        if args.output:
            shutil.move(modified_zip_path, args.output)
            modified_zip_path = args.output

        print(f"\n{Fore.GREEN}✓{Style.RESET_ALL} Modified document saved to: {modified_zip_path}")

        if modified_files:
            print(f"\n{Fore.CYAN}Modified {len(modified_files)} file(s) within the .docx:{Style.RESET_ALL}")
            if not args.verbose:  # Don't repeat if already shown
                for file in modified_files[:10]:  # Show first 10
                    print(f"  - {file}")
                if len(modified_files) > 10:
                    print(f"  ... and {len(modified_files) - 10} more")
        else:
            print(f"{Fore.YELLOW}⚠{Style.RESET_ALL} No files were modified (pattern not found)")

        return 0

    except Exception as e:
        print(f"{Fore.RED}Error: {e}{Style.RESET_ALL}")
        return 1

if __name__ == "__main__":
    exit(main())