SSM环境下java如何实现语音识别（百度语音识别版）-白红宇

SSM环境下java如何实现语音识别（百度语音识别版）

阅读量：3890 次

发布时间：2019-05-23

本文共 13259 字，大约阅读时间需要 44 分钟。

本项目是使用SSM作为基础结构，MAVEN作为jar包管理。

项目实现前提：PC端、电脑具有录音麦克风。

第一步：在MAVEN中导入语音识别jar包


       
    
     com.baidu.aip
        
    
     java-sdk
        
    
     4.8.0
        
           
       
    
     com.googlecode.soundlibs
        
    
     mp3spi
        
    
     1.9.5.4

第二步：做一个HTML来进行语音识别前台：

<%@ page language="java" contentType="text/html; charset=UTF-8"    pageEncoding="UTF-8"%>
   Insert title here

第三步：在springmvcr中创建开始录音和结束录音两个controlle，开始录音的controlle用于在java中读取本台笔记本的麦克风，实现持续录音功能。结束录音的controlle用于结束录音状态，并且保存录音，然后将录音文件转换为可被百度接口识别的音频文件.pcm，然后调用百度接口进行实现真正的语音识别，返回识别内容。

@Resource(name="myRecord")	private MyRecord mr;	@RequestMapping("voiceBegin")	public String voiceBegin(String flag) {		mr.capture();		return "jsp/voice";	}	@ResponseBody	@RequestMapping(value="voiceEnd",produces="text/html;charset=UTF-8")	public String voiceEnd(String flag,HttpServletRequest request) {		String word=null;		//关闭录音		mr.setStopflag(true);		//获得项目下的路径（到webapp的位置）		String basepath=request.getSession().getServletContext().getRealPath("");		//保存mp3文件		String mp3path=mr.save(basepath);		String pcmpath=basepath+"static/pcm/a1.pcm";		try {			System.out.println(mp3path);			//将mp3文件转换成pcm文件，并且保存在项目中			MP3ConvertPCM.mp3Convertpcm(mp3path,pcmpath);			word=Sample.beginSample(pcmpath);		} catch (Exception e) {			// TODO 自动生成的 catch 块			e.printStackTrace();		}		return word;	}

具体的业务逻辑过程就是以上三步骤。接下来是controoller里面调用的三个主要实现语音识别类的代码。这三个类都是我在网上找到的方法整理而成的，仅仅供学习使用。

首先，MyRecord类实现了四个功能：

1.java读取笔记本麦克风，并进行录音，录音过程是使用一个线程来进行持续读入，然后将读入的内容放在一个ByteArrayOutputStream流中。

2.实现了停止录音状态的结束录音功能。

3.播放语音功能。

4.保存录音功能。

具体代码如下：

package com.tumao.otherService;import java.io.*;import javax.sound.sampled.*;import org.springframework.stereotype.Service; /**  * 录音，将声音保存为mp3格式的音频文件  * @author Administrator  *  */@Service("myRecord")public class MyRecord { 	//定义录音格式	AudioFormat af = null;	//定义目标数据行,可以从中读取音频数据,该 TargetDataLine 接口提供从目标数据行的缓冲区读取所捕获数据的方法。	TargetDataLine td = null;	//定义源数据行,源数据行是可以写入数据的数据行。它充当其混频器的源。应用程序将音频字节写入源数据行，这样可处理字节缓冲并将它们传递给混频器。	SourceDataLine sd = null;	//定义字节数组输入输出流	ByteArrayInputStream bais = null;	ByteArrayOutputStream baos = null;	//定义音频输入流	AudioInputStream ais = null;	//定义停止录音的标志，来控制录音线程的运行	Boolean stopflag = false;		//开始录音	public void capture()	{		try {			//af为AudioFormat也就是音频格式			af = getAudioFormat();			DataLine.Info info = new DataLine.Info(TargetDataLine.class,af);			td = (TargetDataLine)(AudioSystem.getLine(info));			//打开具有指定格式的行，这样可使行获得所有所需的系统资源并变得可操作。			td.open(af);			//允许某一数据行执行数据 I/O			td.start();						//创建播放录音的线程			Record record = new Record();			Thread t1 = new Thread(record);			t1.start();					} catch (LineUnavailableException ex) {			ex.printStackTrace();			return;		}	}	//停止录音	public void stop()	{		stopflag = true;				}	//播放录音	public void play()	{		//将baos中的数据转换为字节数据		byte audioData[] = baos.toByteArray();		//转换为输入流		bais = new ByteArrayInputStream(audioData);		af = getAudioFormat();		ais = new AudioInputStream(bais, af, audioData.length/af.getFrameSize());				try {			DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, af);            sd = (SourceDataLine) AudioSystem.getLine(dataLineInfo);            sd.open(af);            sd.start();            //创建播放进程            Play py = new Play();            Thread t2 = new Thread(py);            t2.start();           		} catch (Exception e) {			e.printStackTrace();		}finally{			try {				//关闭流				if(ais != null)				{					ais.close();				}				if(bais != null)				{					bais.close();				}				if(baos != null)				{					baos.close();				}							} catch (Exception e) {						e.printStackTrace();			}		}			}	//保存录音	public String save(String basepath)	{		 //取得录音输入流        af = getAudioFormat();        byte audioData[] = baos.toByteArray();        bais = new ByteArrayInputStream(audioData);        ais = new AudioInputStream(bais,af, audioData.length / af.getFrameSize());        //定义最终保存的文件名        File file = null;        //写入文件        try {	        	//以当前的时间命名录音的名字        	//将录音的文件存放到F盘下语音文件夹下        	File filePath = new File(basepath);        	if(!filePath.exists())        	{//如果文件不存在，则创建该目录        		filePath.mkdir();        	}        	file = new File(basepath+"static/MP3/"+System.currentTimeMillis()+".mp3");           	String path=basepath+"static/MP3/"+System.currentTimeMillis()+".mp3";        	System.out.println(path);            AudioSystem.write(ais, AudioFileFormat.Type.WAVE, file);            return path;        } catch (Exception e) {            e.printStackTrace();        }finally{        	//关闭流        	try {        		        		if(bais != null)        		{        			bais.close();        		}         		if(ais != null)        		{        			ais.close();		        		}			} catch (Exception e) {				e.printStackTrace();			}   	        }        return null;	}	//设置AudioFormat的参数	public AudioFormat getAudioFormat() 	{		//下面注释部分是另外一种音频格式，两者都可以		AudioFormat.Encoding encoding = AudioFormat.Encoding.        PCM_SIGNED ;		float rate = 8000f;		int sampleSize = 16;		String signedString = "signed";		boolean bigEndian = true;		int channels = 1;		return new AudioFormat(encoding, rate, sampleSize, channels,				(sampleSize / 8) * channels, rate, bigEndian);//		//采样率是每秒播放和录制的样本数//		float sampleRate = 16000.0F;//		// 采样率8000,11025,16000,22050,44100//		//sampleSizeInBits表示每个具有此格式的声音样本中的位数//		int sampleSizeInBits = 16;//		// 8,16//		int channels = 1;//		// 单声道为1，立体声为2//		boolean signed = true;//		// true,false//		boolean bigEndian = true;//		// true,false//		return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed,bigEndian);	}	//录音类，因为要用到MyRecord类中的变量，所以将其做成内部类		class Record implements Runnable		{			//定义存放录音的字节数组,作为缓冲区			byte bts[] = new byte[10000];			//将字节数组包装到流里，最终存入到baos中			//重写run函数			public void run() {					baos = new ByteArrayOutputStream();						try {					System.out.println("ok3");					stopflag = false;					while(stopflag != true)					{						//当停止录音没按下时，该线程一直执行							//从数据行的输入缓冲区读取音频数据。						//要读取bts.length长度的字节,cnt 是实际读取的字节数						int cnt = td.read(bts, 0, bts.length);						if(cnt > 0)						{							baos.write(bts, 0, cnt);						}					}				} catch (Exception e) {					e.printStackTrace();				}finally{					try {						//关闭打开的字节数组流						if(baos != null)						{							baos.close();						}						} catch (IOException e) {						e.printStackTrace();					}finally{						td.stop();						td.drain();						td.close();					}				}			}					}		//播放类,同样也做成内部类		class Play implements Runnable		{			//播放baos中的数据即可			public void run() {				byte bts[] = new byte[10000];				try {					int cnt;		            //读取数据到缓存数据		            while ((cnt = ais.read(bts, 0, bts.length)) != -1) 		            {		                if (cnt > 0) 		                {		                    //写入缓存数据		                    //将音频数据写入到混频器		                    sd.write(bts, 0, cnt);		                }		            }		           				} catch (Exception e) {					e.printStackTrace();				}finally{					 sd.drain();			         sd.close();				}											}				}		public Boolean getStopflag() {			return stopflag;		}		public void setStopflag(Boolean stopflag) {			this.stopflag = stopflag;		}	}

其次，是转换音频格式的MP3ConvertPCM类，用于将.mp3录音文件转换成能够进行语音识别的.pcm文件，具体代码如下

package com.tumao.otherService;import java.io.File;import java.io.FileOutputStream;import java.io.OutputStream; import javax.sound.sampled.AudioFormat;import javax.sound.sampled.AudioInputStream;import javax.sound.sampled.AudioSystem;/** * MP3转PCM Java方式实现 * @author 小帅丶 * @date 2017年12月6日 */public class MP3ConvertPCM {	/**	 * MP3转换PCM文件方法	 * @param mp3filepath 原始文件路径	 * @param pcmfilepath 转换文件的保存路径	 * @throws Exception 	 */	public static void mp3Convertpcm(String mp3filepath,String pcmfilepath) throws Exception{		File mp3 = new File(mp3filepath);		File pcm = new File(pcmfilepath);		//原MP3文件转AudioInputStream		AudioInputStream mp3audioStream = AudioSystem.getAudioInputStream(mp3);		//将AudioInputStream MP3文件 转换为PCM AudioInputStream		AudioInputStream pcmaudioStream = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, mp3audioStream);		//准备转换的流输出到OutputStream		OutputStream os = new FileOutputStream(pcm);		int bytesRead = 0;		byte[] buffer = new byte[8192];		while ((bytesRead=pcmaudioStream.read(buffer, 0, 8192))!=-1) {			os.write(buffer, 0, bytesRead);		}		os.close();		pcmaudioStream.close();	}	}

最后，是读取百度API，实现语音识别，并且返回一个字符串word，该word即读取结果。

package com.tumao.otherService;import java.io.BufferedReader;import java.io.DataOutputStream;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.URL; import javax.xml.bind.DatatypeConverter; import org.json.JSONObject; /**  * 连接百度的声音识别接口，处理pcm音频文件，将其转换为一段话  * @author Administrator  *  */public class Sample {     private static final String serverURL = "http://vop.baidu.com/server_api";    private static String token = "";    //private static final String testFileName = "F:\\语音文件\\a1.pcm";    //put your own params here    private static final String apiKey = "nYe9EaMPwybjcGu8GZZzcygf";//这里的apiKey就是前面申请在应用卡片中的apiKey    private static final String secretKey = "AOc63YWkqyqoOEfhdBxpCIdjKIEKlvdY";//这里的secretKey就是前面申请在应用卡片中的secretKey    private static final String cuid = "36-E6-AD-3F-F6-88";//cuid是设备的唯一标示，因为我用的是PC，所以这里用的是网卡Mac地址     public static void main(String[] args) throws Exception {        getToken();       // method1("F:\\语音文件\\a1.pcm");        method2("F:\\语音文件\\a1.pcm");    }        public static String beginSample(String testFileName) throws Exception{    	//testFileName="F:\\语音文件\\16k.pcm";    	 getToken();         // method1("F:\\语音文件\\a1.pcm");         String word=method2(testFileName);         word=word.substring(word.indexOf("[")+2, word.indexOf("]")-1);         word=word.replaceAll(",", "/");         System.out.println(word);          return word;    }     private static void getToken() throws Exception {        String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" +            "&client_id=" + apiKey + "&client_secret=" + secretKey;        HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();        token = new JSONObject(printResponse(conn)).getString("access_token");    }     private static void method1(String testFileName) throws Exception {        File pcmFile = new File(testFileName);        HttpURLConnection conn = (HttpURLConnection) new URL(serverURL).openConnection();         // construct params        JSONObject params = new JSONObject();        params.put("format", "pcm");        params.put("rate", 8000);        params.put("channel", "1");        params.put("token", token);        params.put("cuid", cuid);        params.put("len", pcmFile.length());        params.put("speech", DatatypeConverter.printBase64Binary(loadFile(pcmFile)));         // add request header        conn.setRequestMethod("POST");        conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");         conn.setDoInput(true);        conn.setDoOutput(true);         // send request        DataOutputStream wr = new DataOutputStream(conn.getOutputStream());        wr.writeBytes(params.toString());        wr.flush();        wr.close();         printResponse(conn);    }     private static String method2(String testFileName) throws Exception {        File pcmFile = new File(testFileName);        HttpURLConnection conn = (HttpURLConnection) new URL(serverURL                + "?cuid=" + cuid + "&token=" + token).openConnection();         // add request header        conn.setRequestMethod("POST");        conn.setRequestProperty("Content-Type", "audio/pcm; rate=8000");         conn.setDoInput(true);        conn.setDoOutput(true);         // send request        DataOutputStream wr = new DataOutputStream(conn.getOutputStream());        wr.write(loadFile(pcmFile));        wr.flush();        wr.close();        String word=printResponse(conn);       return word;    }     private static String printResponse(HttpURLConnection conn) throws Exception {        if (conn.getResponseCode() != 200) {            // request error            return "";        }        InputStream is = conn.getInputStream();        BufferedReader rd = new BufferedReader(new InputStreamReader(is,"utf-8"));        String line;        StringBuffer response = new StringBuffer();        while ((line = rd.readLine()) != null) {            response.append(line);            response.append('\r');        }        rd.close();        System.out.println(new JSONObject(response.toString()).toString(4));        return response.toString();    }     private static byte[] loadFile(File file) throws IOException {        InputStream is = new FileInputStream(file);         long length = file.length();        byte[] bytes = new byte[(int) length];         int offset = 0;        int numRead = 0;        while (offset < bytes.length                && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {            offset += numRead;        }         if (offset < bytes.length) {            is.close();            throw new IOException("Could not completely read file " + file.getName());        }         is.close();        return bytes;    }}

SSM实现语音识别的内容就是这些，实现后你就可以在html中实现语音识别啦！路径问题大家要注意跟自己的项目结构进行调整，这是我本项目的项目路径结构：

转载地址：http://yeshn.baihongyu.com/

你可能感兴趣的文章