java 离线语音识别
生活随笔
收集整理的這篇文章主要介紹了
java 离线语音识别
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
萬分感謝原創作者
最近研究vosk? 官網提供的gradle? gradle沒用過,研究了好久 沒搞定
本文采用的是springboot+maven,這樣看起來就輕松多了
核心代碼
pom
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><parent><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-parent</artifactId><version>2.3.1.RELEASE</version><relativePath/> <!-- lookup parent from repository --></parent><groupId>com.example</groupId><artifactId>voice</artifactId><version>0.0.1-SNAPSHOT</version><name>voice-ai</name><description>Demo project for Spring Boot</description><properties><java.version>1.8</java.version></properties><repositories><repository><id>com.alphacephei</id><name>vosk</name><url>https://alphacephei.com/maven/</url></repository></repositories><dependencies><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId><scope>test</scope></dependency><dependency><groupId>net.java.dev.jna</groupId><artifactId>jna</artifactId><version>5.7.0</version></dependency><dependency><groupId>com.alphacephei</groupId><artifactId>vosk</artifactId><version>0.3.30</version></dependency><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId><optional>true</optional></dependency><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>1.2.8</version></dependency></dependencies><build><plugins><plugin><groupId>org.springframework.boot</groupId><artifactId>spring-boot-maven-plugin</artifactId></plugin></plugins></build></project> VoiceUtil package com.vosk.voskai;import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.sun.media.sound.WaveFileReader; import com.sun.media.sound.WaveFileWriter; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import org.springframework.util.Assert; import org.springframework.util.StringUtils; import org.vosk.LibVosk; import org.vosk.LogLevel; import org.vosk.Model; import org.vosk.Recognizer;import javax.sound.sampled.AudioFileFormat; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import java.io.*; import java.nio.file.Files; import java.nio.file.Paths;@Component public class VoiceUtil {@Value("${voskmodel}")private String VOSKMODELPATH;public String getWord(String filePath) throws Exception {Assert.isTrue(StringUtils.hasLength(VOSKMODELPATH), "無效的VOS模塊!");byte[] bytes = Files.readAllBytes(Paths.get(filePath));// 轉換為16KHZreSamplingAndSave(bytes, filePath);File f = new File(filePath);RandomAccessFile rdf = null;rdf = new RandomAccessFile(f, "r"); // log.info("聲音尺寸:{}", toInt(read(rdf, 4, 4))); // log.info("音頻格式:{}", toShort(read(rdf, 20, 2)));short track = toShort(read(rdf, 22, 2)); // log.info("1 單聲道 2 雙聲道: {}", track); // log.info("采樣率、音頻采樣級別 16000 = 16KHz: {}", toInt(read(rdf, 24, 4))); // log.info("每秒波形的數據量:{}", toShort(read(rdf, 22, 2))); // log.info("采樣幀的大小:{}", toShort(read(rdf, 32, 2))); // log.info("采樣位數:{}", toShort(read(rdf, 34, 2)));rdf.close();LibVosk.setLogLevel(LogLevel.WARNINGS);try (Model model = new Model(VOSKMODELPATH);InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream(filePath)));// 采樣率為音頻采樣率的聲道倍數Recognizer recognizer = new Recognizer(model, 16000 * track)) {int nbytes;byte[] b = new byte[4096];int i = 0;while ((nbytes = ais.read(b)) >= 0) {i += 1;if (recognizer.acceptWaveForm(b, nbytes)) { // System.out.println(recognizer.getResult());} else { // System.out.println(recognizer.getPartialResult());}}String result = recognizer.getFinalResult(); // log.info("識別結果:{}", result);System.out.println("識別結果:{}" + result);if (StringUtils.hasLength(result)) {JSONObject jsonObject = JSON.parseObject(result);return jsonObject.getString("text").replace(" ", "");}return "";}}public static int toInt(byte[] b) {return (((b[3] & 0xff) << 24) + ((b[2] & 0xff) << 16) + ((b[1] & 0xff) << 8) + ((b[0] & 0xff) << 0));}public static short toShort(byte[] b) {return (short) ((b[1] << 8) + (b[0] << 0));}public static byte[] read(RandomAccessFile rdf, int pos, int length) throws IOException {rdf.seek(pos);byte result[] = new byte[length];for (int i = 0; i < length; i++) {result[i] = rdf.readByte();}return result;}public static void reSamplingAndSave(byte[] data, String path) throws Exception {WaveFileReader reader = new WaveFileReader();AudioInputStream audioIn = reader.getAudioInputStream(new ByteArrayInputStream(data));AudioFormat srcFormat = audioIn.getFormat();int targetSampleRate = 16000;AudioFormat dstFormat = new AudioFormat(srcFormat.getEncoding(),targetSampleRate,srcFormat.getSampleSizeInBits(),srcFormat.getChannels(),srcFormat.getFrameSize(),srcFormat.getFrameRate(),srcFormat.isBigEndian());AudioInputStream convertedIn = AudioSystem.getAudioInputStream(dstFormat, audioIn);File file = new File(path);WaveFileWriter writer = new WaveFileWriter();writer.write(convertedIn, AudioFileFormat.Type.WAVE, file);} }Controller
@GetMapping("/getWordG")public String getWordG() {String path = "D:\\mp3\\11.wav";try {// 上傳成功,開始解析String text = voiceUtil.getWord(path);return text;} catch (Exception e) {e.printStackTrace();return "上傳失敗";}}最后,項目我上傳了 大家可以直接下載 vosk的model也下載好了(中文的,需要英文的自己去vosk的網站下載)。vosk-ai.rar-Java文檔類資源-CSDN文庫https://download.csdn.net/download/gb617305413/34847847
總結
以上是生活随笔為你收集整理的java 离线语音识别的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 算法第四版 练习答案 1.4.1
- 下一篇: 赠书 | 原动力——改变未来世界的5大核