-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocessing.py
More file actions
57 lines (45 loc) · 1.69 KB
/
Copy pathpreprocessing.py
File metadata and controls
57 lines (45 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
preprocessing.py
This module handles reading the ciphertext and cleaning it up
"""
def load_ciphertext(filepath):
"""
Reads the ciphertext file and returns it as a string
"""
with open(filepath, 'r', encoding='utf-8') as f:
return f.read()
def preprocess_text(raw_text):
"""
Removes all non-alphabetic characters and converts to uppercase
Also records where spaces and punctuation were so we can put them back later
Returns: (clean_text, non_alpha_positions)
"""
clean_text = ""
non_alpha_positions = []
# Go through each character
for char in raw_text:
if char.isalpha():
# Convert to uppercase and treat J as I (Playfair rule)
letter = char.upper()
if letter == 'J':
letter = 'I'
clean_text += letter
else:
# Record where this punctuation/space was
# Position is where it should go back in the clean text
position = len(clean_text)
non_alpha_positions.append((position, char))
return clean_text, non_alpha_positions
def restore_formatting(decrypted_text, non_alpha_positions):
"""
Puts back all the spaces and punctuation that we removed earlier
"""
# Start with the decrypted text
result = decrypted_text
# Insert punctuation back in reverse order so positions don't shift
# Sort by position in reverse
sorted_positions = sorted(non_alpha_positions, key=lambda x: x[0], reverse=True)
for position, char in sorted_positions:
# Insert the character at the recorded position
result = result[:position] + char + result[position:]
return result