前幾天做了個報名秒殺功能,用戶需要輸入:
? ? ? ? ? ? ? ? 姓,名,姓拼音,名拼音
? ? ? ??中間使用到了pinyin4J,?記錄一下。
導入pinyin4j-2.5.0.jar創建 Pinyin4jUtil。以下是具體代碼
package com.kewei.framework.common;import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;import com.alibaba.fastjson.JSON;import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;public class Pinyin4jUtil { ?/**?* 漢字轉換位漢語拼音首字母,英文字符不變,特殊字符丟失 支持多音字,生成方式如(長沙市長:cssc,zssz,zssc,cssz)?* ?* @param chines?* ? ? ? ? ? ?漢字?* @return 拼音?*/ ?private static Map<String, List<String>> pinyinMap = new HashMap<String, List<String>>(); ?static{//initPinyin("/config/duoyinzi_dic.txt"); ?}public static String converterToFirstSpell(String chines) { ?StringBuffer pinyinName = new StringBuffer(); ?char[] nameChar = chines.toCharArray(); ?HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); ?defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); ?defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); ?for (int i = 0; i < nameChar.length; i++) { ?if (nameChar[i] > 128) { ?try { ?// 取得當前漢字的所有全拼 ?String[] strs = PinyinHelper.toHanyuPinyinStringArray( ?nameChar[i], defaultFormat); ?if (strs != null) { ?for (int j = 0; j < strs.length; j++) { ?// 取首字母 ?pinyinName.append(strs[j].charAt(0)); ?if (j != strs.length - 1) { ?pinyinName.append(","); ?} ?} ?} ?// else { ?// pinyinName.append(nameChar[i]); ?// } ?} catch (BadHanyuPinyinOutputFormatCombination e) { ?e.printStackTrace(); ?} ?} else { ?pinyinName.append(nameChar[i]); ?} ?pinyinName.append(" "); ?} ?// return pinyinName.toString(); ?return parseTheChineseByObject(discountTheChinese(pinyinName.toString())); ?} ?/**?* 漢字轉換位漢語全拼,英文字符不變,特殊字符丟失?* 支持多音字,生成方式如(重當參:zhongdangcen,zhongdangcan,chongdangcen?* ,chongdangshen,zhongdangshen,chongdangcan)?* ?* @param chines?* ? ? ? ? ? ?漢字?* @return 拼音?*/ ?public static String converterToSpell(String chines) { ?StringBuffer pinyinName = new StringBuffer(); ?char[] nameChar = chines.toCharArray(); ?HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); ?defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); ?defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); ?for (int i = 0; i < nameChar.length; i++) { ?if (nameChar[i] > 128) { ?try { ?// 取得當前漢字的所有全拼 ?String[] strs = PinyinHelper.toHanyuPinyinStringArray( ?nameChar[i], defaultFormat); ?if (strs != null) { ?for (int j = 0; j < strs.length; j++) { ?pinyinName.append(strs[j]); ?if (j != strs.length - 1) { ?pinyinName.append(","); ?} ?} ?} ?} catch (BadHanyuPinyinOutputFormatCombination e) { ?e.printStackTrace(); ?} ?} else { ?pinyinName.append(nameChar[i]); ?} ?pinyinName.append(" "); ?} ?// return pinyinName.toString(); ?return parseTheChineseByObject(discountTheChinese(pinyinName.toString())); ?} ?/**?* 去除多音字重復數據?* ?* @param theStr?* @return?*/ ?private static List<Map<String, Integer>> discountTheChinese(String theStr) { ?// 去除重復拼音后的拼音列表 ?List<Map<String, Integer>> mapList = new ArrayList<Map<String, Integer>>(); ?// 用于處理每個字的多音字,去掉重復 ?Map<String, Integer> onlyOne = null; ?String[] firsts = theStr.split(" "); ?// 讀出每個漢字的拼音 ?for (String str : firsts) { ?onlyOne = new Hashtable<String, Integer>(); ?String[] china = str.split(","); ?// 多音字處理 ?for (String s : china) { ?Integer count = onlyOne.get(s); ?if (count == null) { ?onlyOne.put(s, new Integer(1)); ?} else { ?onlyOne.remove(s); ?count++; ?onlyOne.put(s, count); ?} ?} ?mapList.add(onlyOne); ?} ?return mapList; ?} ?/**?* 解析并組合拼音,對象合并方案(推薦使用)?* ?* @return?*/ ?private static String parseTheChineseByObject( ?List<Map<String, Integer>> list) { ?Map<String, Integer> first = null; // 用于統計每一次,集合組合數據 ?// 遍歷每一組集合 ?for (int i = 0; i < list.size(); i++) { ?// 每一組集合與上一次組合的Map ?Map<String, Integer> temp = new Hashtable<String, Integer>(); ?// 第一次循環,first為空 ?if (first != null) { ?// 取出上次組合與此次集合的字符,并保存 ?for (String s : first.keySet()) { ?for (String s1 : list.get(i).keySet()) { ?String str = s + s1; ?temp.put(str, 1); ?} ?} ?// 清理上一次組合數據 ?if (temp != null && temp.size() > 0) { ?first.clear(); ?} ?} else { ?for (String s : list.get(i).keySet()) { ?String str = s; ?temp.put(str, 1); ?} ?} ?// 保存組合數據以便下次循環使用 ?if (temp != null && temp.size() > 0) { ?first = temp; ?} ?} ?String returnStr = ""; ?if (first != null) { ?// 遍歷取出組合字符串 ?for (String str : first.keySet()) { ?returnStr += (str + ","); ?} ?} ?if (returnStr.length() > 0) { ?returnStr = returnStr.substring(0, returnStr.length() - 1); ?} ?return returnStr; ?} ?/**?* 將某個字符串的首字母 大寫?* @param str?* @return?*/ ?public static String convertInitialToUpperCase(String str){ ?if(str==null){ ?return ""; ?} ?StringBuffer sb = new StringBuffer(); ?char[] arr = str.toCharArray(); ?for(int i=0;i<arr.length;i++){ ?char ch = arr[i]; ?if(i==0){ ?sb.append(String.valueOf(ch).toUpperCase()); ?}else{ ?sb.append(ch); ?} ?} ?return sb.toString(); ?} ?/**?* 獲取字符串的首字母?* @param str?* @return?*/ ?public static String getInitialChar(String str){ ?if(str==null){ ?return ""; ?} ?StringBuffer sb = new StringBuffer(); ?char[] arr = str.toCharArray(); ?if(arr!=null && arr.length>0){sb.append(String.valueOf(arr[0]));}return sb.toString(); ?} ?/**?* 漢字轉拼音 最大匹配優先?* @param chinese?* @return?*/ ?public static String convertChineseToPinyin(String chinese, boolean shortTerm) { ?StringBuffer pinyin = new StringBuffer(); ?HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); ?defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); ?defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); ?char[] arr = chinese.toCharArray(); ?for (int i = 0; i < arr.length; i++) { ?char ch = arr[i]; ?if (ch > 128) { // 非ASCII碼 ?// 取得當前漢字的所有全拼 ?try { ?String[] results = PinyinHelper.toHanyuPinyinStringArray( ?ch, defaultFormat); ?if (results == null) { ?//非中文 ?continue; ?} else { ?int len = results.length; ?if (len == 1) { // 不是多音字 ?// ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[0]); ?String py = results[0]; ? ? ??if(py.contains("u:")){ ?//過濾 u: ?py = py.replace("u:", "v"); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("filter u:"+py); ?} ?if(shortTerm){pinyin.append(getInitialChar(py)); ?}else{pinyin.append(convertInitialToUpperCase(py)); ?}}else if(results[0].equals(results[1])){ ? ?//非多音字 有多個音,取第一個 ?// ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[0]); ?if(shortTerm){pinyin.append(getInitialChar(results[0])); ?}else{pinyin.append(convertInitialToUpperCase(results[0])); ?}}else { // 多音字 ?// ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("多音字:"+ch); ?int length = chinese.length(); ?boolean flag = false; ?String s = null; ?List<String> keyList =null; ?for (int x = 0; x < len; x++) { ?String py = results[x]; ?if(py.contains("u:")){ ?//過濾 u: ?py = py.replace("u:", "v"); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("filter u:"+py); ?} ?keyList = pinyinMap.get(py); ?if (i + 3 <= length) { ? //后向匹配2個漢字 ?大西洋 ??s = chinese.substring(i, i + 3); ?if (keyList != null && (keyList.contains(s))) { ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?if (value != null && value.contains(s)) { ?// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("last 2 > " + py); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[x]); ?if(shortTerm){pinyin.append(getInitialChar(py)); ?}else{pinyin.append(convertInitialToUpperCase(py)); ?}flag = true; ?break; ?} ?} ?if (i + 2 <= length) { ? //后向匹配 1個漢字 ?大西 ?s = chinese.substring(i, i + 2); ?if (keyList != null && (keyList.contains(s))) { ?// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("last 1 > " + py); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[x]); ?if(shortTerm){pinyin.append(getInitialChar(py)); ?}else{pinyin.append(convertInitialToUpperCase(py)); ?}flag = true; ?break; ?} ?} ?if ((i - 2 >= 0) && (i+1<=length)) { ?// 前向匹配2個漢字 龍固大 ?s = chinese.substring(i - 2, i+1); ?if (keyList != null && (keyList.contains(s))) { ?// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("before 2 < " + py); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[x]); ?if(shortTerm){pinyin.append(getInitialChar(py)); ?}else{pinyin.append(convertInitialToUpperCase(py)); ?}flag = true; ?break; ?} ?} ?if ((i - 1 >= 0) && (i+1<=length)) { ?// 前向匹配1個漢字 ? 固大 ?s = chinese.substring(i - 1, i+1); ?if (keyList != null && (keyList.contains(s))) { ?// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("before 1 < " + py); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[x]); ?if(shortTerm){pinyin.append(getInitialChar(py)); ?}else{pinyin.append(convertInitialToUpperCase(py)); ?}flag = true; ?break; ?} ?} ?if ((i - 1 >= 0) && (i+2<=length)) { ?//前向1個,后向1個 ? ? ?固大西 ?s = chinese.substring(i - 1, i+2); ?if (keyList != null && (keyList.contains(s))) { ?// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("before last 1 <> " + py); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[x]); ?if(shortTerm){pinyin.append(getInitialChar(py)); ?}else{pinyin.append(convertInitialToUpperCase(py)); ?}flag = true; ?break; ?} ?} ?} ?if (!flag) { ? ?//都沒有找到,匹配默認的 讀音 ?大 ??s = String.valueOf(ch); ?boolean found = false;for (int x = 0; x < len; x++) { ?String py = results[x]; ?if(py.contains("u:")){ ?//過濾 u: ?py = py.replace("u:", "v"); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("filter u:"); ?} ?keyList = pinyinMap.get(py); ?if (keyList != null && (keyList.contains(s))) { ?// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?System.out.println("default = " + py); ?
// ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pinyin.append(results[x]); ?//如果不需要拼音首字母大寫 ,直接返回即可 ?if(shortTerm){pinyin.append(getInitialChar(py)); ?}else{pinyin.append(convertInitialToUpperCase(py)); ?}found = true;break; ?} ?} ?if(!found){//取第一個音if(results.length>0){if(shortTerm){pinyin.append(getInitialChar(results[0])); ?}else{pinyin.append(convertInitialToUpperCase(results[0])); ?}}}} ?} ?} ?} catch (BadHanyuPinyinOutputFormatCombination e) { ?e.printStackTrace(); ?} ?} else { ?pinyin.append(arr[i]); ?} ?} ?return pinyin.toString(); ?} ?/**?* 初始化 所有的多音字詞組?* ?* @param fileName?*/ ?public static void initPinyin(String fileName) { ?// 讀取多音字的全部拼音表; ?InputStream file = PinyinHelper.class.getResourceAsStream(fileName); ?BufferedReader br = new BufferedReader(new InputStreamReader(file)); ?String s = null; ?try { ?while ((s = br.readLine()) != null) { ?if (s != null) { ?String[] arr = s.split("#"); ?String pinyin = arr[0]; ?String chinese = arr[1]; ?if(chinese!=null){ ?String[] strs = chinese.split(" "); ?List<String> list = Arrays.asList(strs); ?pinyinMap.put(pinyin, list); ?} ?} ?} ?} catch (IOException e) { ?e.printStackTrace(); ?}finally{ ?try { ?br.close(); ?} catch (IOException e) { ?e.printStackTrace(); ?} ?} ?} ?} ?
main方法測試
public static void main(String[] args){String surName = "JIE";String name = "QIAN";System.out.println(Pinyin4jUtil.converterToSpell("解").toUpperCase().indexOf(surName));System.out.println(Pinyin4jUtil.converterToSpell("茜").toUpperCase().indexOf(name));//驗證是否包含String surName = stuExamInfo.getSurNamePy();String name = stuExamInfo.getNamePy();int pysurnameFlag = 0;String py4jsurNameStr = Pinyin4jUtil.converterToSpell(stuExamInfo.getSurName()).toUpperCase();String[] py4jsurNameArr = py4jsurNameStr.split(",");for (String py4jsurName : py4jsurNameArr) {if(surName.equals(py4jsurName)){pysurnameFlag = 1;break;}}int pynameFlag = 0;String py4jNameStr = Pinyin4jUtil.converterToSpell(stuExamInfo.getName()).toUpperCase();String[] py4jNameArr = py4jNameStr.split(",");for (String py4jName : py4jNameArr) {if(name.equals(py4jName)){pynameFlag = 1;break;}}}?
最開始的時候沒有考慮多音字,直接使用名轉拼音字符串相等。后來修改成了上文方式。
? ? ? ? 將字的拼音組合和傳入的拼音依次對比。當 pysurnameFlag 與pynameFlag都相等時,則輸入正確
注意:在默認情況下 呂(LV) 使用工具類得到的拼音是LU
需要使用 defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);進行單獨處理。
? ? ? ??
總結
以上是生活随笔為你收集整理的Pinyin4jUtil 验证姓名与拼音是否一致,自持多音字。的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。