I. opening Alibaba cloud TTS service
Log in to Alibaba cloud and choose product - > Artificial Intelligence - > speech synthesis
Click "apply for opening" and create a project in "management console"
Copy token and appkey
II. Interface with speech synthesis api
Because the sdk needs to introduce many third-party jar packages, it is recommended to dock RESTful API
copy the demo code in the interface document, paste the application into token and appkey, and it can run directly. Demo will generate a syAudio.wav file, which can be played directly by the language player.
According to the document prompts, two jar packages need to be introduced into the project:
<dependency> <groupId>com.squareup.okhttp3</groupId> <artifactId>okhttp</artifactId> <version>3.9.1</version> </dependency> <!-- http://mvnrepository.com/artifact/com.alibaba/fastjson --> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.42</version> </dependency>
Voice generation tool class:
import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.alibaba.fastjson.JSONObject; import com.hsoft.commutil.props.PropertiesUtil; import okhttp3.MediaType; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.RequestBody; import okhttp3.Response; public class SpeechRestfulUtil { private static Logger logger = LoggerFactory.getLogger(SpeechRestfulUtil.class); private String accessToken; private String appkey; private static SpeechRestfulUtil instance; private static SpeechRestfulUtil getInstance() { if (instance == null) { synchronized (SpeechRestfulUtil.class) { if (instance == null) { String appkey = PropertiesUtil.getProperty("aliyun.voice.appkey"); String token = PropertiesUtil.getProperty("aliyun.voice.token"); instance = new SpeechRestfulUtil(appkey, token); } } } return instance; } private SpeechRestfulUtil(String appkey, String token) { this.appkey = appkey; this.accessToken = token; } /** * HTTPS GET request */ private byte[] processGETRequet(String text, String format, int sampleRate) { /** * Set HTTPS GET request * 1.Use HTTPS protocol * 2.Domain name of speech recognition service: nls-gateway.cn-shanghai.aliyuncs.com * 3.Voice recognition interface request path / stream/v1/tts * 4.Set the required parameters: appkey, token, text, format, sample rate. * 5.Set optional request parameters: voice, volume, speech rate, pitch rate */ String url = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/tts"; url = url + "?appkey=" + appkey; url = url + "&token=" + accessToken; url = url + "&text=" + text; url = url + "&format=" + format; url = url + "&sample_rate=" + String.valueOf(sampleRate); // voice pronunciation person, optional, default is xiaoyun // url = url + "&voice=" + "xiaoyun"; // Volume volume, range 0-100, optional, default 50 // url = url + "&volume=" + String.valueOf(50); // Speech rate, range is - 500 ~ 500, optional, default is 0 url = url + "&speech_rate=" + String.valueOf(100); // pitch_rate tone, range is - 500 ~ 500, optional, default is 0 // url = url + "&pitch_rate=" + String.valueOf(0); // System.out.println("URL: " + url); /** * Send HTTPS GET request and process the response of the server */ Request request = new Request.Builder() .url(url) .get() .build(); byte[] bytes=null; try { OkHttpClient client = new OkHttpClient(); Response response = client.newCall(request).execute(); String contentType = response.header("Content-Type"); if ("audio/mpeg".equals(contentType)) { bytes =response.body().bytes(); // File f = new File(audioSaveFile); // FileOutputStream fout = new FileOutputStream(f); // fout.write(response.body().bytes()); // fout.close(); // System.out.println(f.getAbsolutePath()); logger.info("The GET SpeechRestful succeed!"); } else { // ContentType is null or "application/json" String errorMessage = response.body().string(); logger.info("The GET SpeechRestful failed: " + errorMessage); } response.close(); } catch (Exception e) { logger.error("processGETRequet",e); } return bytes; } /** * HTTPS POST request */ private byte[] processPOSTRequest(String text, String audioSaveFile, String format, int sampleRate) { /** * Set HTTPS POST request * 1.Use HTTPS protocol * 2.Domain name of voice synthesis service: nls-gateway.cn-shanghai.aliyuncs.com * 3.Voice synthesis interface request path / stream/v1/tts * 4.Set the required parameters: appkey, token, text, format, sample rate. * 5.Set optional request parameters: voice, volume, speech rate, pitch rate */ String url = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/tts"; JSONObject taskObject = new JSONObject(); taskObject.put("appkey", appkey); taskObject.put("token", accessToken); taskObject.put("text", text); taskObject.put("format", format); taskObject.put("sample_rate", sampleRate); // voice pronunciation person, optional, default is xiaoyun // taskObject.put("voice", "xiaoyun"); // Volume volume, range 0-100, optional, default 50 // taskObject.put("volume", 50); // Speech rate, range is - 500 ~ 500, optional, default is 0 // taskObject.put("speech_rate", 0); // pitch_rate tone, range is - 500 ~ 500, optional, default is 0 // taskObject.put("pitch_rate", 0); String bodyContent = taskObject.toJSONString(); // System.out.println("POST Body Content: " + bodyContent); RequestBody reqBody = RequestBody.create(MediaType.parse("application/json"), bodyContent); Request request = new Request.Builder() .url(url) .header("Content-Type", "application/json") .post(reqBody) .build(); byte[] bytes=null; try { OkHttpClient client = new OkHttpClient(); Response response = client.newCall(request).execute(); String contentType = response.header("Content-Type"); if ("audio/mpeg".equals(contentType)) { bytes = response.body().bytes(); logger.info("The POST SpeechRestful succeed!"); } else { // ContentType is null or "application/json" String errorMessage = response.body().string(); logger.info("The POST SpeechRestful failed: " + errorMessage); } response.close(); } catch (Exception e) { logger.error("processPOSTRequest",e); } return bytes; } public static byte[] text2voice(String text) { if (StringUtils.isBlank(text)) { return null; } SpeechRestfulUtil demo = SpeechRestfulUtil.getInstance(); // String text = "member collection: 87.12 yuan"; // urlencode with RFC 3986 String textUrlEncode = text; try { textUrlEncode = URLEncoder.encode(textUrlEncode, "UTF-8") .replace("+", "%20") .replace("*", "%2A") .replace("%7E", "~"); } catch (UnsupportedEncodingException e) { logger.error("encode",e); } // String audioSaveFile = "syAudio.wav"; String format = "wav"; int sampleRate = 16000; return demo.processGETRequet(textUrlEncode, format, sampleRate); } }
3. Integrate websocket
Of course, our goal is not to get an audio file, but to hear the sound directly on the web site.
To achieve this, we need to introduce Websocket, push the audio resource to the web page directly, and then use FileReader object to play it directly.
1. Introducing jar package
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-websocket</artifactId> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>log4j-over-slf4j</artifactId> </exclusion> <exclusion> <groupId>org.hibernate</groupId> <artifactId>hibernate-validator</artifactId> </exclusion> </exclusions> </dependency>
2. Create a Websocket processing class
public class VoiceHandler extends AbstractWebSocketHandler { private static final Logger logger = LoggerFactory.getLogger(VoiceHandler.class); @Override public void afterConnectionEstablished(WebSocketSession session) throws Exception { VoicePool.add(session); } @Override public void afterConnectionClosed(WebSocketSession session, CloseStatus status) throws Exception { VoicePool.remove(session); } @Override protected void handleTextMessage(WebSocketSession session, TextMessage message) throws Exception { logger.debug("receive Msg :" + message.getPayload()); TextMessage msg=new TextMessage(message.getPayload()); session.sendMessage(msg); } }
3. Create websocket connection pool management class
public class VoicePool { private static final Logger logger = LoggerFactory.getLogger(VoicePool.class); private static Map<String, WebSocketSession> pool = new ConcurrentHashMap<String, WebSocketSession>(); private static Map<Long, List<String>> userMap = new ConcurrentHashMap<Long, List<String>>(); private static final ExecutorService threadPool = Executors.newFixedThreadPool(50); public static void add(WebSocketSession inbound) { pool.put(inbound.getId(), inbound); Map<String, String> map = ParamUtil.parser(inbound.getUri().getQuery()); Long companyId = Long.valueOf(map.get("companyId")); logger.info("add companyId:{}", companyId); List<String> lstInBound = null; if (companyId != null) { lstInBound = userMap.get(companyId); if (lstInBound == null) { lstInBound = new ArrayList<String>(); userMap.put(companyId, lstInBound); } lstInBound.add(inbound.getId()); } logger.info("add connetion {},total size {}", inbound.getId(), pool.size()); } public static void remove(WebSocketSession socket) { String sessionId = socket.getId(); List<String> lstInBound = null; Map<String, String> map = ParamUtil.parser(socket.getUri().getQuery()); Long companyId = Long.valueOf(map.get("companyId")); logger.info("remove companyId:{}", companyId); if (StringUtils.isNotBlank(sessionId)) { if (companyId != null) { lstInBound = userMap.get(companyId); if (lstInBound != null) { lstInBound.remove(sessionId); if (lstInBound.isEmpty()) { userMap.remove(companyId); } } } } pool.remove(sessionId); logger.info("remove connetion {},total size {}", sessionId, pool.size()); } /** Push information */ public static void broadcast(VoiceMsgVo vo) { Long companyId = vo.getCompanyId(); if (companyId == null || companyId == 0L) { return; } List<String> lstInBoundId = userMap.get(companyId); if (lstInBoundId == null || lstInBoundId.isEmpty()) { return; } byte[] bytes = SpeechRestfulUtil.text2voice(vo.getText()); if (bytes == null) { return; } threadPool.execute(() -> { try { logger.info("send to companyId:{}", companyId); for (String id : lstInBoundId) { // Send to specified user WebSocketSession connection = pool.get(id); if (connection != null) { BinaryMessage msg = new BinaryMessage(bytes); connection.sendMessage(msg); } } } catch (Exception e) { logger.error("broadcast error: companyId:{}", companyId, e); } }); } }
Message object bean
public class VoiceMsgVo { private String text; private Long companyId; }
4. Websocket configuration
@Configuration @EnableWebSocket public class WebSocketConfig implements WebSocketConfigurer { @Override public void registerWebSocketHandlers(WebSocketHandlerRegistry registry) { registry.addHandler(voiceHandler(), "/ws/voice").setAllowedOrigins("*"); } @Bean public VoiceHandler voiceHandler() { return new VoiceHandler(); } }
5. Front end js processing
Create any page, and introduce the following js
var audioContext = new (window.AudioContext || window.webkitAudioContext)(); var Chat = {}; Chat.socket = null; Chat.connect = (function(host) { if ("WebSocket" in window) { Chat.socket = new WebSocket(host); } else if ("MozWebSocket" in window) { Chat.socket = new MozWebSocket(host); } else { Console.log("Error: WebSocket is not supported by this browser."); return; } Chat.socket.onopen = function() { Console.log("Info: Voice broadcast started."); // Heartbeat detection reset heartCheck.reset().start(Chat.socket); }; Chat.socket.onclose = function() { Console.log("Info: Voice broadcast is off."); }; Chat.socket.onmessage = function(message) { heartCheck.reset().start(Chat.socket); if (message.data == null || message.data == '' || "HeartBeat" == message.data){ //Heartbeat message return; } var reader = new FileReader(); reader.onload = function(evt) { if (evt.target.readyState == FileReader.DONE) { audioContext.decodeAudioData(evt.target.result, function(buffer) { // Decode to pcm stream var audioBufferSouceNode = audioContext .createBufferSource(); audioBufferSouceNode.buffer = buffer; audioBufferSouceNode .connect(audioContext.destination); audioBufferSouceNode.start(0); }, function(e) { console.log(e); }); } }; reader.readAsArrayBuffer(message.data); }; }); Chat.initialize = function() { Chat.companyId = _currCompanyId; if (window.location.protocol == "http:") { Chat.connect("ws://" + window.location.host + "/ws/voice?companyId="+Chat.companyId); } else { Chat.connect("wss://" + window.location.host + "/ws/voice?companyId="+Chat.companyId); } }; Chat.sendMessage = (function() { var message = document.getElementById("chat").value; if (message != "") { Chat.socket.send(message); document.getElementById("chat").value = ""; } }); var Console = {}; Console.log = (function(message) { var _console=document.getElementById("console"); if (_console==null || _console==undefined){ console.log(message); return; } var p=document.createElement("p"); p.style.wordWrap="break-word"; p.innerHTML=message; _console.appendChild(p); while(_console.childNodes.length>25) { _console.removeChild(_console.firstChild); } _console.scrollTop=_console.scrollHeight; }); Chat.initialize(); //Heartbeat detection var heartCheck = { timeout : 60000,// 60 seconds timeoutObj : null, serverTimeoutObj : null, reset : function() { clearTimeout(this.timeoutObj); clearTimeout(this.serverTimeoutObj); return this; }, start : function(ws) { var self = this; this.timeoutObj = setTimeout(function() { // A heartbeat is sent here. When the backend receives it, it returns a heartbeat message. // onmessage gets the returned heartbeat, which means the connection is normal. // console.log('start heartCheck'); ws.send("HeartBeat"); self.serverTimeoutObj = setTimeout(function() {// If it has not been reset for a certain period of time, it means that the backend is disconnected actively. ws.close();// If onclose will execute reconnect, we can execute ws.close(). If we directly execute reconnect // It will trigger onclose and cause reconnection twice. }, self.timeout) }, this.timeout) } }
IV. start engineering test
Start the project and send a message from the background
VoiceMsgVo vo = new VoiceMsgVo(); vo.setCompanyId(1L); vo.setText("What a nice day today! I went out for a walk"); VoicePool.broadcast(vo);