import os from chardet.universaldetector import UniversalDetector import chardet def get_filelist(path): """ 获取路径下所有csv文件的路径列表 """ Filelist = [] for home, dirs, files in os.walk(path): for filename in files: if ".csv" in filename: Filelist.append(os.path.join(home, filename)) return Filelist def read_file(file): """ 逐个读取文件的内容 """ with open(file, 'rb') as f: return f.read() def get_encode_info(file): """ 逐个读取文件的编码方式 """ with open(file, 'rb') as f: # data = f.read() # detected_encoding = chardet.detect(data)['encoding'] detector = UniversalDetector() for line in f.readlines(): detector.feed(line) if detector.done: break detector.close() # return detected_encoding return detector.result['encoding'] # return detected_encoding def convert_encode2gbk(file, original_encode, des_encode): """ 将文件的编码方式转换为gbk,并写入原先的文件中。 """ file_content = read_file(file) file_decode = file_content.decode(original_encode, 'ignore') file_encode = file_decode.encode(des_encode) with open(file, 'wb') as f: f.write(file_encode) def read_and_convert(path): """ 读取文件并转换 """ Filelist = get_filelist(path=path) fileNum= 0 for filename in Filelist: try: file_content = read_file(filename) print("filename:", filename) encode_info = get_encode_info(filename) print("encode_info", encode_info) if encode_info != 'gbk': fileNum +=1 convert_encode2gbk(filename, encode_info, 'gbk') print('成功转换 %s 个文件 %s '%(fileNum,filename)) except BaseException: print(filename,'存在问题,请检查!') def recheck_again(path): """ 再次判断文件是否为gbk """ print('---------------------以下文件仍存在问题---------------------') Filelist = get_filelist(path) for filename in Filelist: encode_info_ch = get_encode_info(filename) if encode_info_ch != 'gbk': print(filename,'的编码方式是:',encode_info_ch) print('--------------------------检查结束--------------------------') # if __name__ == "__main__": # """ # 输入文件路径 # """ # path = r"D:\data" # read_and_convert(path) # recheck_again(path) # print('转换结束!')