-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathunicode2koi8r.py
More file actions
130 lines (117 loc) · 3.06 KB
/
Copy pathunicode2koi8r.py
File metadata and controls
130 lines (117 loc) · 3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""Convert Cyrillic from iso-8859-1 Unicode-encoded to KOI8-R-encoded
This script is used during the build process of the Russian translation
of "Dive Into Python" (http://diveintopython.org/).
It takes one argument, which can be either an HTML file or a directory.
If a file, it converts the file in place; if a directory, it converts
every HTML file in the immediate directory (but not recursively).
Safe but pointless to run more than once on the same file or directory.
"""
__author__ = "Mark Pilgrim (mark@diveintopython.org)"
__version__ = "$Revision: 1.2 $"
__date__ = "$Date: 2004/05/05 21:57:19 $"
__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
__license__ = "Python"
import os
import sys
import re
unicodeToKOI8R = { \
'Ё': '\xb3',
'А': '\xe1',
'Б': '\xe2',
'В': '\xf7',
'Г': '\xe7',
'Д': '\xe4',
'Е': '\xe5',
'Ж': '\xf6',
'З': '\xfa',
'И': '\xe9',
'Й': '\xea',
'К': '\xeb',
'Л': '\xec',
'М': '\xed',
'Н': '\xee',
'О': '\xef',
'П': '\xf0',
'Р': '\xf2',
'С': '\xf3',
'Т': '\xf4',
'У': '\xf5',
'Ф': '\xe6',
'Х': '\xe8',
'Ц': '\xe3',
'Ч': '\xfe',
'Ш': '\xfb',
'Щ': '\xfd',
'Ъ': '\xff',
'Ы': '\xf9',
'Ь': '\xf8',
'Э': '\xfc',
'Ю': '\xe0',
'Я': '\xf1',
'а': '\xc1',
'б': '\xc2',
'в': '\xd7',
'г': '\xc7',
'д': '\xc4',
'е': '\xc5',
'ж': '\xd6',
'з': '\xda',
'и': '\xc9',
'й': '\xca',
'к': '\xcb',
'л': '\xcc',
'м': '\xcd',
'н': '\xce',
'о': '\xcf',
'п': '\xd0',
'р': '\xd2',
'с': '\xd3',
'т': '\xd4',
'у': '\xd5',
'ф': '\xc6',
'х': '\xc8',
'ц': '\xc3',
'ч': '\xde',
'ш': '\xdb',
'щ': '\xdd',
'ъ': '\xdf',
'ы': '\xd9',
'ь': '\xd8',
'э': '\xdc',
'ю': '\xc0',
'я': '\xd1',
'ё': '\xa3' }
unicodePattern = re.compile(r'&#[0-9]{4,4};')
charsetPattern = re.compile(r'ISO-8859-1', re.IGNORECASE)
def translateMatch(match):
unicode = match.group(0)
if unicodeToKOI8R.has_key(unicode):
return unicodeToKOI8R[unicode]
else:
return unicode
def translateBuffer(buffer):
buffer = unicodePattern.sub(translateMatch, buffer)
buffer = charsetPattern.sub('KOI8-R', buffer)
return buffer
def translateFile(filename, outfilename=None):
if not outfilename:
outfilename = filename
fsock = open(filename)
buffer = fsock.read()
fsock.close()
buffer = translateBuffer(buffer)
fsock = open(outfilename, 'wb')
fsock.write(buffer)
fsock.close()
def htmlFilter(filename):
return os.path.splitext(filename)[1] == '.html'
def translateDirectory(directoryname, filterFunc=htmlFilter):
fileList = [os.path.join(directoryname, f) for f in os.listdir(directoryname)]
fileList = filter(filterFunc, fileList)
map(translateFile, fileList)
if __name__ == "__main__":
name = sys.argv[1]
if os.path.isdir(name):
translateDirectory(name)
else:
translateFile(name)