Files

95 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
from chardet.universaldetector import UniversalDetector
import chardet
def get_filelist(path):
"""
获取路径下所有csv文件的路径列表
"""
Filelist = []
for home, dirs, files in os.walk(path):
for filename in files:
if ".csv" in filename:
Filelist.append(os.path.join(home, filename))
return Filelist
def read_file(file):
"""
逐个读取文件的内容
"""
with open(file, 'rb') as f:
return f.read()
def get_encode_info(file):
"""
逐个读取文件的编码方式
"""
with open(file, 'rb') as f:
# data = f.read()
# detected_encoding = chardet.detect(data)['encoding']
detector = UniversalDetector()
for line in f.readlines():
detector.feed(line)
if detector.done:
break
detector.close()
# return detected_encoding
return detector.result['encoding']
# return detected_encoding
def convert_encode2gbk(file, original_encode, des_encode):
"""
将文件的编码方式转换为gbk并写入原先的文件中。
"""
file_content = read_file(file)
file_decode = file_content.decode(original_encode, 'ignore')
file_encode = file_decode.encode(des_encode)
with open(file, 'wb') as f:
f.write(file_encode)
def read_and_convert(path):
"""
读取文件并转换
"""
Filelist = get_filelist(path=path)
fileNum= 0
for filename in Filelist:
try:
file_content = read_file(filename)
print("filename:", filename)
encode_info = get_encode_info(filename)
print("encode_info", encode_info)
if encode_info != 'gbk':
fileNum +=1
convert_encode2gbk(filename, encode_info, 'gbk')
print('成功转换 %s 个文件 %s '%(fileNum,filename))
except BaseException:
print(filename,'存在问题,请检查!')
def recheck_again(path):
"""
再次判断文件是否为gbk
"""
print('---------------------以下文件仍存在问题---------------------')
Filelist = get_filelist(path)
for filename in Filelist:
encode_info_ch = get_encode_info(filename)
if encode_info_ch != 'gbk':
print(filename,'的编码方式是:',encode_info_ch)
print('--------------------------检查结束--------------------------')
# if __name__ == "__main__":
# """
# 输入文件路径
# """
# path = r"D:\data"
# read_and_convert(path)
# recheck_again(path)
# print('转换结束!')