java文件file字符集_获取文件字符集(或文件编码) 的工具类
packageorg.mozilla.intl.chardet;
importjava.io.BufferedInputStream;
importjava.io.File;
importjava.io.FileInputStream;
importjava.io.FileNotFoundException;
importjava.io.IOException;
/**
*?借助JCharDet獲取文件字符集
*?@author?icer
*?PS:
*?JCharDet?是mozilla自動字符集探測算法代碼的java移植,其官方主頁為:
*??????http://jchardet.sourceforge.net/
*?@date????2008/11/13
*/
publicclassFileCharsetDetector?{
privatebooleanfound?=false;
/**
*?如果完全匹配某個字符集檢測算法,?則該屬性保存該字符集的名稱.?否則(如二進制文件)其值就為默認值?null,?這時應當查詢屬性
*/
privateString?encoding?=null;
publicstaticvoidmain(String[]?argv)throwsException?{
if(argv.length?!=1&&?argv.length?!=2)?{
System.out
.println("Usage:?FileCharsetDetector??[]");
System.out.println("");
System.out.println("Where??is?d:/demo.txt");
System.out.println("For?optional?.?Use?following...");
System.out.println("????????1?=>?Japanese");
System.out.println("????????2?=>?Chinese");
System.out.println("????????3?=>?Simplified?Chinese");
System.out.println("????????4?=>?Traditional?Chinese");
System.out.println("????????5?=>?Korean");
System.out.println("????????6?=>?Dont?know?(default)");
return;
}?else{
String?encoding?=?null;
if(argv.length?==2)?{
encoding?=?newFileCharsetDetector().guestFileEncoding(argv[0],
Integer.valueOf(argv[1]));
}?else{
encoding?=?newFileCharsetDetector().guestFileEncoding(argv[0]);
}
System.out.println("文件編碼:"+?encoding);
}
}
/**
*?傳入一個文件(File)對象,檢查文件編碼
*
*?@param?file
*????????????File對象實例
*?@return?文件編碼,若無,則返回null
*?@throws?FileNotFoundException
*?@throws?IOException
*/
publicString?guestFileEncoding(File?file)throwsFileNotFoundException,
IOException?{
returngeestFileEncoding(file,newnsDetector());
}
/**
*?獲取文件的編碼
*
*?@param?file
*????????????File對象實例
*?@param?languageHint
*????????????語言提示區域代碼?eg:1?:?Japanese;?2?:?Chinese;?3?:?Simplified?Chinese;
*????????????4?:?Traditional?Chinese;?5?:?Korean;?6?:?Dont?know?(default)
*?@return?文件編碼,eg:UTF-8,GBK,GB2312形式,若無,則返回null
*?@throws?FileNotFoundException
*?@throws?IOException
*/
publicString?guestFileEncoding(File?file,intlanguageHint)
throwsFileNotFoundException,?IOException?{
returngeestFileEncoding(file,newnsDetector(languageHint));
}
/**
*?獲取文件的編碼
*
*?@param?path
*????????????文件路徑
*?@return?文件編碼,eg:UTF-8,GBK,GB2312形式,若無,則返回null
*?@throws?FileNotFoundException
*?@throws?IOException
*/
publicString?guestFileEncoding(String?path)throwsFileNotFoundException,
IOException?{
returnguestFileEncoding(newFile(path));
}
/**
*?獲取文件的編碼
*
*?@param?path
*????????????文件路徑
*?@param?languageHint
*????????????語言提示區域代碼?eg:1?:?Japanese;?2?:?Chinese;?3?:?Simplified?Chinese;
*????????????4?:?Traditional?Chinese;?5?:?Korean;?6?:?Dont?know?(default)
*?@return
*?@throws?FileNotFoundException
*?@throws?IOException
*/
publicString?guestFileEncoding(String?path,intlanguageHint)
throwsFileNotFoundException,?IOException?{
returnguestFileEncoding(newFile(path),?languageHint);
}
/**
*?獲取文件的編碼
*
*?@param?file
*?@param?det
*?@return
*?@throws?FileNotFoundException
*?@throws?IOException
*/
privateString?geestFileEncoding(File?file,?nsDetector?det)
throwsFileNotFoundException,?IOException?{
//?Set?an?observer...
//?The?Notify()?will?be?called?when?a?matching?charset?is?found.
det.Init(newnsICharsetDetectionObserver()?{
publicvoidNotify(String?charset)?{
found?=?true;
encoding?=?charset;
}
});
BufferedInputStream?imp?=?newBufferedInputStream(newFileInputStream(
file));
byte[]?buf?=newbyte[1024];
intlen;
booleandone?=false;
booleanisAscii?=true;
while((len?=?imp.read(buf,0,?buf.length))?!=?-1)?{
//?Check?if?the?stream?is?only?ascii.
if(isAscii)
isAscii?=?det.isAscii(buf,?len);
//?DoIt?if?non-ascii?and?not?done?yet.
if(!isAscii?&&?!done)
done?=?det.DoIt(buf,?len,?false);
}
det.DataEnd();
if(isAscii)?{
encoding?=?"ASCII";
found?=?true;
}
if(!found)?{
String?prob[]?=?det.getProbableCharsets();
if(prob.length?>0)?{
//?在沒有發現情況下,則取第一個可能的編碼
encoding?=?prob[0];
}?else{
returnnull;
}
}
returnencoding;
}
}
網上有一些使用文章,但是使用起來,發現還是有一些問題的,看著官方實例,就自己寫了一個。使用起來,感覺不錯。
附件為jchardet包。
總結
以上是生活随笔為你收集整理的java文件file字符集_获取文件字符集(或文件编码) 的工具类的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: java月份去0_java – 使用月
- 下一篇: java自定义配置文件_自定义配置文件如