-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
105 lines (94 loc) · 3.41 KB
/
Copy pathmain.py
File metadata and controls
105 lines (94 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: UTF-8 -*-
# ========================================
# ||Run with python 3!!Run with python 3||
# ========================================
# Copyright 2020 unbadfish
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
import chardet
import os
def exist_bom(file_start):
bom = b'\xef\xbb\xbf'
if file_start == bom:
return True
else:
return False
def utf_8_bom_2_utf_8(file_dir):
"""For UTF-8-SIG
移除UTF-8文件的BOM字节"""
# 该函数改编自CSDN博主「赫兹河马」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
# 原文链接:http://blog.csdn.net/Hongyu_Zhou/article/details/80365815 (2020/06/26)
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++代码不应当使用CC系列协议,故采用相近的Apache License 2.0转载++
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ----------------------------------
# read_byte-->edit_byte-->write_byte
# ----------------------------------
# print(file_dir)
f = open(file_dir, 'rb')
if exist_bom(f.read(3)):
f_body = f.read()
# f.close()
with open(file_dir, 'wb') as f:
f.write(f_body)
f.close()
def gb_2312_2_utf_8(file_dir):
"""For GB2312"""
# -------------------------------------
# read_str-->decode-->byte-->write_byte
# -------------------------------------
# print(file_dir)
f1 = open(file_dir, mode='r')
content = f1.read().encode().decode('utf-8')
# This is a str(as the read_str)
f1.close()
"""有的时候加上open加上>>encoding='gb2312'<<会出问题
所以就用了这种取巧的办法.
其实只用content = f1.read()也可以
原来的代码:
====
f1 = open(file_dir, mode='r', encoding='gb2312')
content = f1.read()
====
请大佬指点【超大声】"""
byte_content = content.encode('utf-8')
with open(file_dir, mode='wb') as f2:
f2.write(byte_content)
f2.close()
def get_encoding(file_dir):
"""输入单个文件路径,返回最有可能的编码方式
需要import chardet"""
f1 = open(file_dir, mode='rb+')
byte_content = f1.read()
en_code_way = chardet.detect(byte_content).get('encoding')
return en_code_way
# file_list = ''
i = 0
err = 0
for root, dirs, files in os.walk('Z:\\'):
# print('root:\n' + str(root))
# print('dirs:\n' + str(dirs))
# print('files:\n' + str(files))
for file in files:
if file.endswith('.lrc'):
each_dir = os.path.join(root, file)
encode_way = get_encoding(each_dir)
# print(encode_way, each_dir)
# file_list += file + '\n'
i += 1
if encode_way == 'utf-8':
pass
elif encode_way == 'UTF-8-SIG':
utf_8_bom_2_utf_8(each_dir)
elif encode_way == 'GB2312':
gb_2312_2_utf_8(each_dir)
else:
print('文件' + each_dir + '解码失败,请手动解决问题')
err += 1
# print('\n\n==all file s==\n' + file_list)
print('共有 %d 个.lrc文件,有 %d 个转换成功,有 %d 个失败' % (i, i-err, err))
print('空文件会读取失败.请注意.')