处理UTF-8编码的不连续的字节流
生活随笔
收集整理的這篇文章主要介紹了
处理UTF-8编码的不连续的字节流
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
package com.util;import java.io.FileInputStream;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;/*** 處理UTF-8編碼的不連續的字節流* @author Administrator**/
public class HandlerUTF8 {/** 緩存不夠一個字符的byte*/private byte[] cacheByte = new byte[6];/** byte數*/private int cacheCount = 0;/*** 解碼UTF-8字節* @param buf* @return* @throws UnsupportedEncodingException*/public String getString(byte[] buf) throws UnsupportedEncodingException{byte[] source = null;//上次不夠一個字符的byte拼在前面if (cacheCount > 0){source = new byte[cacheCount + buf.length];System.arraycopy(cacheByte, 0, source, 0, cacheCount);System.arraycopy(buf, 0, source, cacheCount, buf.length); }else{source = buf;}cacheCount = HandlerUTF8(source);if (cacheCount > 0)System.arraycopy(source, source.length - cacheCount, cacheByte, 0, cacheCount);return new String(source,0,source.length -cacheCount,"utf-8");}/*** UTF-8字符最長6個字節,截取最后6個字節分析* U-00000000 - U-0000007F: 0xxxxxxx * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * @param buf* @return*/private int HandlerUTF8(byte[] buf){byte[] source = null;if (buf.length > 6){source = new byte[6];System.arraycopy(buf, buf.length - 6, source, 0, 6);}else{source = buf;}for (int i = 0; i < source.length; i++){int temp = source[i] & 0xFF;if (temp >> 5 == 0x06){if (source.length - i < 2)return source.length - i;}else if (temp >> 4 == 0x0E){if (source.length - i < 3)return source.length - i;}else if (temp >> 3 == 0x1E){if (source.length - i < 4)return source.length - i;}else if (temp >> 2 == 0x3E){if (source.length - i < 5)return source.length - i;}else if (temp >> 1 == 0x7E){if (source.length - i < 6)return source.length - i;}}return 0;}public static void main(String[] args) throws Exception{
// String sourceString = "測試UTF-8字符串";
// byte[] buf = sourceString.getBytes("utf-8");
// for (int i = 0; i < buf.length; i++)
// System.out.print(buf[i] + ",");//{-26,-75,-117,-24,-81,-107,85,84,70,45,56,-27,-83,-105,-25,-84,-90,-28,-72,-78};byte[] buf1 = {-26,-75,-117,-24,-81,-107,85,84,70,45,56,-27,-83,-105,-25,-84,-90,-28};byte[] buf2 = {-72,-78};HandlerUTF8 handlerUTF8 = new HandlerUTF8();String str = handlerUTF8.getString(buf1);System.out.print(str);String str2 = handlerUTF8.getString(buf2);System.out.print(str2);}
}
?
轉載于:https://www.cnblogs.com/xuruhong/p/3278805.html
總結
以上是生活随笔為你收集整理的处理UTF-8编码的不连续的字节流的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: putty连接linux as5 输入密
- 下一篇: 基于Mahout的电影推荐系统