2005-03-08
Here's a Python program that convert character set encoding for all files in a directory.
# python import os mydir= '/Users/t/web/p/monkey_king' def changeEncoding(filePath): '''take a full path to a file as input, and change its encoding from gb18030 to utf-16''' print filePath tempName=filePath+'~-~' input = open(filePath,'rb') content=unicode(input.read(),'gb18030') input.close() output = open(tempName,'wb') output.write(content.encode('utf-16')) output.close() os.rename(tempName,filePath) def myfun(dummy, dirr, filess): for child in filess: if '.html' == os.path.splitext(child)[1] and os.path.isfile(dirr+'/'+child): changeEncoding(dirr+'/'+child) os.path.walk(mydir, myfun, 'dumb')
See here for example of html files encoded us utf-8. (as result of this script) http://xahlee.org/p/monkey_king/monkey_king.html
See also:
