尝试调用钉钉ASR接口
音频转换
FFmpeg
一、FFmpeg 安装教程(Windows 系统)
下载地址:Download FFmpeg
-
下载安装包
访问 FFmpeg 官网,点击 Windows builds from gyan.dev。
在 Release Builds 区域选择最新版本(如 ffmpeg-7.0.2-essentials_build.zip):
Essentials:基础功能版(推荐)。
Full:完整功能版(需更多存储空间)。 -
解压文件
解压下载的压缩包到非系统盘(如 D:\Programs\ffmpeg),避免占用 C 盘空间。
解压后的目录结构:
├── bin # 核心可执行文件(ffmpeg.exe、ffplay.exe、ffprobe.exe)
├── doc # 文档
└── presets # 预设编码方案 -
配置环境变量
右键 此电脑 → 属性 → 高级系统设置 → 环境变量。
在 系统变量 中找到 Path → 点击 编辑 → 新建 → 输入 D:\Programs\ffmpeg\bin(替换为你的实际路径)。
连续点击 确定 保存设置。 -
验证安装
打开命令行窗口(Win+R → 输入 cmd),执行:
ffmpeg -version
若显示版本信息(如 ffmpeg version 7.0.2),则安装成功
二、FFmpeg 基础使用教程
-
常用命令格式
ffmpeg [全局参数] -i [输入文件] [输出参数] [输出文件] -
基础功能示例
-
视频转码(MP4 → MKV)
ffmpeg -i input.mp4 -c:v libx264 -c:a aac output.mkv -
调整分辨率(1080p → 480p)
ffmpeg -i input.mp4 -vf scale=854:480 output.mp4 -
提取音频
ffmpeg -i input.mp4 -vn -c:a copy output.aac -vn:忽略视频流。
ffmpeg -i input.mp4 -ss 00:01:00 -to 00:02:30 -c:v copy -c:a copy output.mp4 -
剪切视频片段(截取 00:01:00 到 00:02:30)
ffmpeg -i input.mp4 -c:v libx264 -c:a aac output.mkv
-c:v:视频编码器(如 libx265 为 H.265)。
-c:a:音频编码器(如 mp3)。
Java代码实现
<dependency>
<groupId>ws.schild</groupId>
<artifactId>jave-all-deps</artifactId>
<version>3.0.1</version>
</dependency>
public File convertAudioFormat(File source, File target)
throws EncoderException {
// 转换参数
AudioAttributes audio = new AudioAttributes();
// 设置 WAV 文件编码
audio.setCodec("pcm_s16le");
EncodingAttributes attrs = new EncodingAttributes();
// 输出格式是 WAV
attrs.setOutputFormat("wav");
attrs.setAudioAttributes(audio);
Encoder encoder = new Encoder();
encoder.encode(new MultimediaObject(source),target,attrs);
System.out.println("转换完成!");
return target;
}
asr接口
后端
public AjaxResult uploadMedia(MultipartFile file) {
//上传未转化格式的文件
Map map = new HashMap();
map.put("companyName",SecurityUtils.getCompany());
String jsonString = JSONObject.toJSONString(map);
R<SysFile> sysFileR = remoteFileService.uploadAudio(file, jsonString);
System.out.println(sysFileR.getData().getUrl());
//将钉钉上传的录音文件ogg转化为amr,最终删除ogg文件
String oriFilePath = sysFileR.getData().getUrl();
String filePath = oriFilePath.replace("ogg","amr");
File inputFile = new File(oriFilePath);
File outputFile = new File(filePath);
try {
outputFile = convertAudioFormat(inputFile,outputFile);
inputFile.delete();
} catch (EncoderException e) {
throw new RuntimeException(e);
}
//将录音文件上传,进行asr识别
try {
String body = uploadMediaToDingTalk(filePath);
JSONObject jsonObject = JSONObject.parseObject(body);
String mediaId = Objects.toString(jsonObject.get("media_id"),"");
if(!"".equals(mediaId)){
String retValue = asrResponse(mediaId);
JSONObject jsonObject1 = JSONObject.parseObject(retValue);
String code = jsonObject.getString("errcode");
if(code.equals("0")){
jsonObject1.put("filePath",filePath);
jsonObject1.put("media_id",mediaId);
return AjaxResult.success(jsonObject1);
}else{
return AjaxResult.error(jsonObject1.getString("errmsg"));
}
}
System.out.println(body);
return AjaxResult.error("文件上传失败");
} catch (Exception e) {
return AjaxResult.error(e.getMessage());
}
}
上传录音文件(上传录音文件,我们需要使用multipart/form-data POST上传文件)
private String uploadMediaToDingTalk(String fileName) {
//获取accessToken
String accessToken = "";
//accessToken
if (redisService.hasKey(CacheConstants.DT_ACCESS_TOKEN)){
//执行请求
accessToken = redisService.getCacheObject(CacheConstants.DT_ACCESS_TOKEN);
}else{
try {
accessToken = DingTalkTool.getToken();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
}
Map map = new HashMap<>();
map.put("access_token",accessToken);
HttpClient httpClient = HttpClients.createDefault();
// 构建multipart请求
HttpEntity entity = MultipartEntityBuilder.create()
.addTextBody("type","voice")
.addBinaryBody("media", new File(fileName))
.build();
HttpPost httpPost = new HttpPost(SimpleHttpClientUtil.getRequestUrl(DtConstant.BASE_URL, DtConstant.GET_UPLOAD, map));
httpPost.setEntity(entity);
try {
HttpResponse httpResponse = httpClient.execute(httpPost);
if(httpResponse.getStatusLine().getStatusCode()== HttpStatus.SC_OK) {//成功
String jsonStr = EntityUtils.toString(httpResponse.getEntity(), Consts.UTF_8);
JSONObject resultObj = JSONObject.parseObject(jsonStr);
if (resultObj.getInteger("errcode")==DtConstant.ERRCODE_TOKEN_TIMEOUT.intValue())//token过期
{
System.out.println("post()==>token无效");
redisService.deleteObject(CacheConstants.DT_ACCESS_TOKEN);//从redis删除数据
System.out.println("post()==>延迟3秒");
Thread.currentThread().sleep(3000);//延迟3秒
System.out.println("post()==>重新发起请求");
uploadMediaToDingTalk(fileName);//重新发起请求
}else {//执行成功
System.out.println("post()==>token续期:" + accessToken);
redisService.expire(CacheConstants.DT_ACCESS_TOKEN,7000L, TimeUnit.SECONDS);
System.out.println("post()==>返回结果:" + jsonStr);
return jsonStr;
}
}
} catch (Exception e) {
e.printStackTrace();
String msg = MessageFormat.format("post()==>出现异常: {0}", e.getMessage());
System.out.println(msg);
}
System.out.println("post()==>请求发生了错误");
return null;
}
转换格式(使用dd.uploadFile上传的录音文件是OGG格式,上传媒体文件这个接口支持上传amr、mp3、wav格式,最终asr识别ogg 或 amr 格式,所以这里我们转换为amr格式)
public File convertAudioFormat(File source, File target)
throws EncoderException {
// 转换参数
AudioAttributes audio = new AudioAttributes();
// 设置 amr文件编码
audio.setCodec("libopencore_amrnb");
audio.setBitRate(12200);//比特率
audio.setChannels(1);//声道;1单声道,2立体声
audio.setSamplingRate(8000);//采样率(重要!!!)
EncodingAttributes attrs = new EncodingAttributes();
// 输出格式是 amr
attrs.setOutputFormat("amr");
attrs.setAudioAttributes(audio);
Encoder encoder = new Encoder();
encoder.encode(new MultimediaObject(source),target,attrs);
System.out.println("转换完成!");
return target;
}
前端
<template>
<view>
<view class="container">
<view class="message_content">
<view class="chat-messages">
<view v-for="(item, index) in messages" :key="item.id || index" :class="['message', item.sender === 'bot' ? 'receiver' : 'sender']">
<view v-if="item.type === 'voice'" @click="speakVoice">
<view :class="item.sender === 'bot' ? 'voice-left' : 'voice-right'">
<view class="circle circle-dot"></view>
<view :class="['circle','circle1',item.sender === 'bot' ? 'clip1' : 'clip2',{'animate1':item.content.isSpeak}]"></view>
<view :class="['circle','circle2',item.sender === 'bot' ? 'clip1' : 'clip2',{'animate2':item.content.isSpeak}]"></view>
<view :class="['circle','circle3',item.sender === 'bot' ? 'clip1' : 'clip2',{'animate3':item.content.isSpeak}]"></view>
<view :class="item.sender === 'bot' ? 'second-left' : 'second-right'" >{{ item.content.duration }}s</view>
</view>
</view>
<view v-else-if="item.type === 'text'">
<p>{{ item.content }}</p>
</view>
</view>
<!-- 图片或其他媒体元素可以作为<img>标签或额外<div>的子元素添加 -->
</view>
</view>
<view class="input-bottom">
<view class="chat-input" @touchstart="touchStart" @touchend="onEnd" @longpress="onStart" @touchmove="handleRecordMove">
<button class="send-button" v-show="!isTalking" >按住说话</button>
</view>
</view>
</view>
<view class="mic-layer-talking" v-if="isTalking">
<view :class="{'mic-btn-talking':isTalking}">
<view class="mic-btn-talking_text" v-show="isTalking">说话中....</view>
<view class="tip-text" v-show="isTalking&&!sendLock"><text class="mr-10">松开</text>发送</view>
</view>
<view :class="['record-animate-box',{'active':sendLock}]">
<view class="voice-scale">
<view class="item" v-for="(item,index) in 10" :key="index"></view>
</view>
</view>
<view :class="['record-cancel',{'active':sendLock}]">
<!-- ⬇️可换成自己的icon -->
<text class="close-icon">x</text>
<!-- ⬆️️可换成自己的icon -->
<view class="tip-text" v-show="sendLock"><text class="mr-10">松开</text>取消</view>
</view>
</view>
</view>
</template>
<script>
import {asr} from "@/api/system/asr"
import { getToken } from '@/utils/auth'
export default {
data() {
return {
recorderManager:null,
isTalking:false,
sendLock:false,
startPoint:{},
index:5,
messages :[
{
id: 1,
content: '你好!欢迎使用Vue聊天框。',
sender: 'bot',
type: 'text',
timestamp: new Date(Date.now() - 60000)
},
{
id: 2,
content: '这是一个支持语音功能的聊天界面示例。',
sender: 'bot',
type: 'text',
timestamp: new Date(Date.now() - 30000)
},
{
id: 3,
content: '点击右下角麦克风图标可以切换语音模式',
sender: 'person',
type: 'text',
timestamp: new Date(Date.now() - 15000)
},
{
id: 4,
content: {mediaId:1,duration:60,isSpeak:false},
sender: 'person',
type: 'voice',
timestamp: new Date(Date.now() - 15000)
},
{
id: 5,
content: {mediaId:2,duration:32,isSpeak:false},
sender: 'bot',
type: 'voice',
timestamp: new Date(Date.now() - 15000)
}
]
}
},
onLoad() {
this.recorderManager = dd.getRecorderManager();
this.recorderManager.onerror = (err) => {
console.log(err)
};
},
methods: {
/**
* 开始触屏
* @param {Object} e
*/
touchStart(e){
this.startPoint.clientX = e.changedTouches[0].clientX; //手指按下时的X坐标
this.startPoint.clientY = e.changedTouches[0].clientY; //手指按下时的Y坐标
},
/**
* 开始录音
*/
onStart(){
this.isTalking = true;
this.sendLock = false;
const options = {
duration: '60',
sampleRate: 16000,
numberOfChannels: 1,
encodeBitRate: 96000,
format: 'mp3'
}
this.recorderManager.start(options)
console.log("開始錄音")
},
/**
* 录音结束
*/
onEnd(){
this.isTalking = false;
this.recorderManager.onstop = (res) => {
console.log(res)
const duration = res.duration;
var fileName = res.tempFilePath.split('/').pop().slice(0,-6);
//把录音文件上传
dd.uploadFile({
url: 'http://xx.xx.xx.xx:8088/workaffairs/media/upload',
header: {'Authorization':'Bearer ' + getToken()},
fileName: "file",
filePath: res.tempFilePath,
fileType: 'audio',
formData: {},
success: (res) => {
console.log(res)
// const newData = data.data.data;
//上传成功之后将语音放在左边
// this.index += 1;
// this.messages.push({
// id: this.index,
// content: {mediaId:newData.media_id,duration:duration,isSpeak:false},
// sender: 'person',
// type: 'voice',
// timestamp: new Date(Date.now() - duration)
// })
},
fail: () => {},
complete: () => {},
});
};
console.log()
this.recorderManager.stop();
},
/**
* 录音时手指滑动
* @param {Object} e
*/
handleRecordMove(e){
let touchData = e.touches[0]; //滑动过程中,手指滑动的坐标信息
let moveX = touchData.clientX - this.startPoint.clientX;
let moveY = touchData.clientY - this.startPoint.clientY;
if(moveY > -45){ // 滑动距离不够则不取消发送
this.sendLock = false
} else {
this.sendLock = true
}
},
},
/**
* 播放声音
*/
speakVoice(item){
const content = item.content;
}
}
</script>
<style lang="scss">
.mic-layer-talking{
position: fixed;
width: 100%;
height: 100vh;
left: 0;
top: 0;
background-color: rgba(0, 0, 0, .6);
.mic-btn-talking{
position: absolute;
bottom: 0;
width: 100vw;
height: 200rpx;
border-radius: 80px 80px 0 0;
background-color: #F2F2F2;
&_text{
color: #999999;
padding: 60rpx 0;
text-align: center;
}
}
/* 发送、取消提示文字 */
.tip-text{
position: absolute;
top: -60rpx;
left: 50%;
width: 160rpx;
text-align: center;
transform: translateX(-50%);
color: #E3E4EA;
font-size: 24rpx;
}
/* 上方语音动画 */
.record-animate-box{
position: absolute;
left: 50%;
top: 50%;
transform: translate(-50%,-50%);
width: 300rpx;
height: 140rpx;
background-color: #2878F4;
border-radius: 28rpx;
display: flex;
align-items: center;
justify-content: center;
transition: all .3s;
&.active{
background-color: #f56c6c;
width: 140rpx;
}
/* 语音音阶 */
.voice-scale {
width: 60%;
height: 40rpx;
display: flex;
align-items: center;
justify-content: space-between;
.item {
display: block;
background: #333333;
width: 5rpx;
height: 20%;
margin-right: 2.5px;
float: left;
&:last-child{
margin-right: 0px;
}
&:nth-child(1) {
animation: load 1s 0.8s infinite linear;
}
&:nth-child(2) {
animation: load 1s 0.6s infinite linear;
}
&:nth-child(3) {
animation: load 1s 0.4s infinite linear;
}
&:nth-child(4) {
animation: load 1s 0.2s infinite linear;
}
&:nth-child(5) {
animation: load 1s 0s infinite linear;
}
&:nth-child(6) {
animation: load 1s 0.2s infinite linear;
}
&:nth-child(7) {
animation: load 1s 0.4s infinite linear;
}
&:nth-child(8) {
animation: load 1s 0.6s infinite linear;
}
&:nth-child(9) {
animation: load 1s 0.8s infinite linear;
}
&:nth-child(10) {
animation: load 1s 1s infinite linear;
}
}
}
}
@keyframes load {
0%,100%{
opacity: 0;
height: 20%;
}
50%{
opacity: 1;
height: 100%;
}
}
/* 取消按钮 */
.record-cancel{
position: absolute;
left: 50%;
bottom: 300rpx;
transform: translateX(-50%);
width: 100rpx;
height: 100rpx;
border-radius: 50%;
background-color: rgba(0, 0, 0, .2);
display: flex;
justify-content: center;
align-items: center;
transition: all .3s;
&.active{
transform: translateX(-50%) scale(1.2);
background-color: #f56c6c;
}
.close-icon{
font-size: 40rpx;
color: #FFFFFF;
}
}
}
/*主体框架*/
.container{
position: relative;
width: 100%;
height: 100vh;
}
.message_content{
position: absolute;
width: 100%;
top: 0;
height: 80%;
overflow-y: auto;
}
.input-bottom{
position: absolute;
bottom: 0;
height: 10%;
width: 100%;
text-align: center;
padding: 10px;
}
/*聊天框*/
.chat-messages {
display: flex;
flex-direction: column;
padding: 10px;
}
.message {
max-width: 80%; /* 限制消息最大宽度 */
margin-bottom: 10px;
padding: 10px;
border-radius: 10px;
}
.sender {
align-self: flex-end; /* 右对齐发送方消息 */
background-color: #e0e0e0;
}
.receiver {
align-self: flex-start; /* 左对齐接收方消息 */
background-color: #b3e5fc;
}
.chat-input {
display: flex;
justify-content: space-between;
padding: 10px;
background-color: #fff;
border-top: 1px solid #ccc;
}
/* .input-text {
flex-grow: 1;
font-size: 18px;
word-wrap:break-word;
padding: 8px;
border: 1px solid #007bff;
} */
.send-button {
background-color: #007bff;
color: white;
width: 100%;
border: none;
cursor: pointer;
}
/* 语音 */
.voice-right{
position: relative;
left:50%;
min-width: 60px;
min-height: 24px;
}
.voice-left{
position: relative;
right:50%;
min-width: 60px;
min-height: 24px;
}
.second-left{
position: absolute;
left:100%;
bottom: 10%;
}
.second-right{
position: absolute;
right: 100%;
bottom: 10%;
}
.circle {
position: absolute;
border-radius: 50%;
border:2px solid #000;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
}
/* .circle-dot {
width: 2px;
height: 2px;
background-color: black;
} */
/* 不同圆形的半径 */
.circle1 {
width: 12px;
height: 12px;
border-color: black;
}
.clip1 {
clip-path: polygon(50% 50%, 100% 15%, 100% 85%); /* 切割角度微调 */
}
.clip2 {
clip-path: polygon(50% 50%, 0% 15%, 0% 85%); /* 切割角度微调 */
}
.circle2 {
width: 24px;
height: 24px;
border-color: black;
}
.circle3 {
width: 36px;
height: 36px;
border-color: black;
}
.animate1 {
animation: circleAnimate 1.6s infinite 0.333s;;
}
.animate2 {
animation: circleAnimate 1.6s infinite 0.7s;
}
.animate3 {
animation: circleAnimate 1.6s infinite 1s;
}
@keyframes circleAnimate {
0%,100%{
opacity: 0;
}
50%{
opacity: 1;
}
}
</style>
前端这里我是在其他伟大的网友朋友的基础上完成的—程序员的基本美德
更多推荐
所有评论(0)