python3 gb2312转utf8_GB2312转UTF-8

来源

代码

# 编码转换工具,将路径下所有".c .h .cpp .hpp .java"文件都转为 utf-8 格式

#i!/usr/bin/env python3

# -*- coding:utf-8 -*-

import os

import sys

import codecs

import chardet

gErrArray = []

def convert(fileName, filePath,out_enc="utf-8"):

try:

content=codecs.open(filePath,'rb').read()

source_encoding=chardet.detect(content)['encoding']

# print ("fileName:%s \tfileEncoding:%s" %(fileName, source_encoding))

print("{0:50}{1}".format(fileName, source_encoding))

if source_encoding != None:

if source_encoding == out_enc:

return

content=content.decode(source_encoding).encode(out_enc)

codecs.open(filePath,'wb').write(content)

else :

gErrArray.append("can not recgonize file encoding %s" % filePath)

except Exception as err:

gErrArray.append("%s:%s"%(filePath, err))

def explore(dir):

print("\r\n===============================================================")

print("{0:50}{1}".format('fileName', 'fileEncoding'))

print("===============================================================")

for root,dirs,files in os.walk(dir):

for file in files:

suffix = os.path.splitext(file)[1]

if suffix == '.h' or suffix == '.c' or suffix == '.cpp' or suffix == '.hpp' or suffix == '.bat' or suffix == '.java':

path=os.path.join(root,file)

convert(file, path)

def main():

#explore(os.getcwd())

filePath = input("请输入要转换编码的文件夹路径: \n")

explore(filePath)

print('\r\n---------错误统计------------')

for index,item in enumerate(gErrArray):

print(item)

print('\r\n 共%d个错误!'%(len(gErrArray)))

if(len(gErrArray) > 0):

print("出现错误时,可手动找到错误文件,用notepad++打开后,点击编码,改为utf-8保存")

print('\r\n-----------------------------')

if __name__=="__main__":

while True:

main()

input("\r\n########### 按回车键继续转换!!! ###########\r\n")


版权声明:本文为weixin_35990295原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。