Java pinyin4j 汉字转拼音包括——多音字
生活随笔
收集整理的這篇文章主要介紹了
Java pinyin4j 汉字转拼音包括——多音字
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
Java漢字轉(zhuǎn)拼音(包括多音字)
有個(gè)需求需要把漢字轉(zhuǎn)拼音,我的小伙伴推薦用Unicode官方的包;下載有些慢。
實(shí)際中用了Java工具包:pinyin4j解決
可以轉(zhuǎn)漢字,多音字,多音字的地方要求不太準(zhǔn)確的,可以直接取列表的第一個(gè)值;
1. Maven依賴
<!--漢字轉(zhuǎn)拼音--><dependency><groupId>com.belerweb</groupId><artifactId>pinyin4j</artifactId><version>2.5.1</version></dependency>
2. Java示例代碼
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;import java.util.*;/************************************** Class Name: PinyinUtil* Description:〈拼音轉(zhuǎn)換工具類〉* @author smn* @create 2020/8/11* @since 1.0.0************************************/
@Slf4j
public class PinyinUtil {/*** 漢字轉(zhuǎn)拼音(全拼)** @param src* @return*/public static String getPinyin(String src) {char[] srcCharArray = src.toCharArray();// 設(shè)置漢字拼音輸出的格式HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);StringBuffer result = new StringBuffer();for (int i = 0; i < srcCharArray.length; i++) {String[] tempArray = null;// 判斷能否為漢字字符if (Character.toString(srcCharArray[i]).matches("[\\u4E00-\\u9FA5]+")) {try {tempArray = PinyinHelper.toHanyuPinyinStringArray(srcCharArray[i], outputFormat);} catch (BadHanyuPinyinOutputFormatCombination e) {log.error("處理" + src + "出錯(cuò)", e);}// 將漢字的幾種全拼都存到數(shù)組中result.append(tempArray[0]);} else {// 如果不是漢字字符,間接取出字符并連接到字符串后result.append(Character.toString(srcCharArray[i]));}result.append(" ");}return result.toString().trim();}/*** 返回不帶空格分割的全拼** @param src* @return*/public static String getPinyinWithoutBlank(String src) {return getPinyin(src).replaceAll(" ", "");}/*** 返回多音字的全部拼音(不區(qū)分聲調(diào))** @param src* @return*/public static List<String> getMultiplePronounciationsWithoutTone(String src) {List<String> dstPinyinList = new ArrayList<String>();List<String> tempPinyinList = new ArrayList<String>();String[] curCharPinyin = null;Set<String> curPinyinSet = null;// 設(shè)置漢字拼音輸出的格式HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);char[] srcCharArray = src.toCharArray();for (char curChar : srcCharArray) {// 判斷能否為漢字字符if (Character.toString(curChar).getBytes().length != Character.toString(curChar).length()) {if (Character.toString(curChar).matches("[\\u4E00-\\u9FA5]+")) {try {curCharPinyin = PinyinHelper.toHanyuPinyinStringArray(curChar, outputFormat);if (null == curCharPinyin) {log.error("[" + Character.toString(curChar) + "]字轉(zhuǎn)換拼音失敗:轉(zhuǎn)換結(jié)果為空!");return null;}// 集合用于去除聲調(diào)不同的重復(fù)拼音curPinyinSet = new HashSet<String>();for (int i = 0; i < curCharPinyin.length; i++) {if (!curPinyinSet.contains(curCharPinyin)) {curPinyinSet.add(curCharPinyin[i]);} else {continue;}}} catch (BadHanyuPinyinOutputFormatCombination e) {log.error("[" + Character.toString(curChar) + "]字轉(zhuǎn)換拼音失敗:" + e.getMessage(), e);return null;}} else {// 不在Unicode漢字編碼內(nèi)的字符,返回nulllog.error("[" + Character.toString(curChar) + "]字轉(zhuǎn)換拼音失敗:轉(zhuǎn)換結(jié)果為空!");return null;}} else {// 如果不是漢字字符,則直接將字符放入當(dāng)前字符拼音集合curPinyinSet = new HashSet<String>();curPinyinSet.add(Character.toString(curChar));}// 進(jìn)行輸出拼音字串拼接Iterator<String> iter = null;if (dstPinyinList.size() == 0) {iter = curPinyinSet.iterator();while (iter.hasNext()) {String curPinyin = (String) iter.next();dstPinyinList.add(curPinyin);}} else {for (String dstPinyin : dstPinyinList) {iter = curPinyinSet.iterator();while (iter.hasNext()) {String curPinyin = (String) iter.next();tempPinyinList.add(dstPinyin + " " + curPinyin);}}dstPinyinList.clear();dstPinyinList.addAll(tempPinyinList);tempPinyinList.clear();}}return dstPinyinList;}public static List<String> getMultiplePronounciationsWithoutToneWithoutBlank(String src) {List<String> result = new ArrayList<String>();List<String> data = getMultiplePronounciationsWithoutTone(src);for (String str : data) {result.add(str.replaceAll(" ", ""));}return result;}public static List<String> getJianPin(String src) {List<String> result = new ArrayList<String>();List<String> tempList = getMultiplePronounciationsWithoutTone(src);StringBuffer sb = new StringBuffer();for (String str : tempList) {String[] array = str.split(" ");for (String s : array) {sb.append(s.charAt(0));}result.add(sb.toString().toLowerCase());sb.delete(0, sb.length());}return result;}public static void main(String[] args) {String str = "滿地都是六便士";System.out.println(PinyinUtil.getPinyin(str));System.out.println(PinyinUtil.getPinyinWithoutBlank(str));System.out.println(PinyinUtil.getMultiplePronounciationsWithoutTone(str));System.out.println(PinyinUtil.getMultiplePronounciationsWithoutToneWithoutBlank(str));System.out.println(PinyinUtil.getJianPin(str));log.info(" " + PinyinUtil.getJianPin("1597219781375_1.大型互聯(lián)網(wǎng)系統(tǒng)架構(gòu)演進(jìn)之路.mp4"));}
}
3. 結(jié)果:
參考:
- https://blog.csdn.net/jiaotuwoaini/article/details/52585810
總結(jié)
以上是生活随笔為你收集整理的Java pinyin4j 汉字转拼音包括——多音字的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: ES强制删除docs.deleted 标
- 下一篇: 通过h5页面上传视频到Linux服务器