python 音量检测
python 音量检测
·
目录
静音设置:
pactl set-sink-mute alsa_output.pci-0000_00_14.0.analog-stereo 0
作用:设置是否静音
0 = 解除静音
1 = 静音
音量设置:
pactl set-sink-volume alsa_output.pci-0000_00_14.0.analog-stereo 50%
# 增加主音量15%
amixer sset Master 15%+
# 增加PCM设备音量15%
amixer sset PCM 15%+
耳机和外放不一样
耳机没有杂音,声音小也能听清,外放有周围环境杂音,声音小了听不到
然后参考不一样,如果参考声音大的,小的就感觉比较明显了。
音量检测速度优化版:
import glob
import os
import shutil
import time
import soundfile as sf
import numpy as np
def rms_dbfs_numpy(samples: np.ndarray) -> float:
"""计算一段音频的 RMS (dBFS)"""
if len(samples) == 0:
return -100.0
# 转 float 避免溢出
samples = samples.astype(np.float32)
rms = np.sqrt(np.mean(samples ** 2))
if rms == 0:
return -100.0
return 20 * np.log10(rms / (2**15)) # int16 最大值归一化
def detect_loud(file: str, threshold: float=-42.0, step_sec: int=1) -> (bool, float): # type: ignore
"""
检测文件是否存在超过阈值的片段
:param file: 音频文件路径
:param threshold: dBFS 阈值
:param step_sec: 检测步长(秒)
:return: (是否超过阈值, 最大 dBFS)
"""
try:
data, sr = sf.read(file, dtype='int16')
except Exception as e:
print(f"[跳过] {file}, 错误: {e}")
return False, -100.0
step = sr * step_sec
max_fb = -100.0
for i in range(0, len(data), step):
segment = data[i:i+step]
if len(segment) == 0:
break
rms_val = rms_dbfs_numpy(segment)
max_fb = max(max_fb, rms_val)
if rms_val > threshold:
return True, max_fb # 找到即返回,避免无谓计算
return False, max_fb
if __name__ == "__main__":
dir_a=r"/Users/lbg/Documents/data/volume_low"
files=glob.glob(dir_a+"/*.mp3")
low_dir="/Users/lbg/Documents/data/volume_low32/"
os.makedirs(low_dir,exist_ok=True)
ok_files = []
small_num=0
threshold=-40.0
step_sec =1
for file in files:
start=time.time()
ok, max_fb = detect_loud(file, threshold=threshold, step_sec=step_sec)
print('time',time.time()-start)
if ok:
ok_files.append(file)
max_fb=-max_fb
if ok==0:
small_num+=1
shutil.copy(file,low_dir+f"{max_fb:.0f}_"+os.path.basename(file)[:-4]+f".mp3")
print(small_num,'small',file)
pudub音量检测
import glob
import time
from pydub import AudioSegment
import numpy as np
import os
import shutil
def rms_dbfs(segment):
samples = np.array(segment.get_array_of_samples())
rms = np.sqrt(np.mean(samples.astype(float)**2))
if rms == 0:
return -100.0
return 20 * np.log10(rms / (2**15)) # int16 最大值归一化
dir_a=r"/Users/lbg/Documents/data/volume_low"
files=glob.glob(dir_a+"/*.mp3")
low_dir="/Users/lbg/Documents/data/volume_low31/"
os.makedirs(low_dir,exist_ok=True)
ok_paths=[]
small_num=0
for file in files:
start=time.time()
try:
audio = AudioSegment.from_file(file)
except Exception as e:
print(e)
continue
ok_=0
max_fb=-100
step = 1000 # 毫秒
for i in range(0, len(audio), step):
segment = audio[i:i+step]
rms_val = rms_dbfs(segment)
max_fb=max(rms_val,max_fb)
if rms_val > -42: # 根据经验 -20~-25 dBFS 以上算正常说话声
ok_=1
ok_paths.append(file)
break
# print(f"第 {i//1000} 秒: {segment.dBFS:.2f} dBFS")
print('time',time.time()-start,len(audio))
max_fb=-max_fb
if ok_==0:
small_num+=1
shutil.copy(file,low_dir+f"{max_fb:.0f}_"+os.path.basename(file)[:-4]+f".mp3")
print(small_num,'small',file)
平均音量
import glob
import os
import shutil
import time
import soundfile as sf
import numpy as np
import os
import shutil
from natsort import natsorted
def rms_dbfs_numpy(samples: np.ndarray) -> float:
"""计算一段音频的 RMS (dBFS)"""
if len(samples) == 0:
return -100.0
# 转 float 避免溢出
samples = samples.astype(np.float32)
rms = np.sqrt(np.mean(samples ** 2))
if rms == 0:
return -100.0
return 20 * np.log10(rms / (2 ** 15)) # int16 最大值归一化
# def rms_dbfs_numpy(data):
# """计算音频段的RMS值(以dBFS表示)"""
# data = data.astype(np.float32)
# rms = np.sqrt(np.mean(data ** 2))
# return 20 * np.log10(rms) if rms > 0 else -100.0
def get_audio_levels(data, step):
"""
获取音频的平均音量和最大音量
"""
max_db = -100.0
total_db = 0.0
count = 0
for i in range(0, len(data), step):
segment = data[i:i + step]
if len(segment) == 0:
break
rms_val = rms_dbfs_numpy(segment)
max_db = max(max_db, rms_val)
total_db += rms_val
count += 1
avg_db = total_db / count if count > 0 else -100.0
return avg_db, max_db
def detect_loud(file) -> (float, float): # type: ignore
try:
data, sr = sf.read(file, dtype='int16')
except Exception as e:
print(f"[跳过] {file}, 错误: {e}")
return False, -100.0
step_sec = 0.1 # 分析窗口长度(秒)
step = sr * step_sec # 转换为采样点数
step = int(step)
# 假设data是您的音频数据
avg_volume, max_volume = get_audio_levels(data, step)
return avg_volume, max_volume
if __name__ == "__main__":
dir_a = r"B:\data\audio\music\label\music_no"
# dir_a = r"B:\data\audio\music\label\music_ok"
img_files = ['%s/%s' % (i[0].replace("\\", "/"), j) for i in os.walk(dir_a) for j in i[-1] if j.lower().endswith(('wav', 'pkl', 'jpeg'))]
low_dir = "volume_low32_no/"
os.makedirs(low_dir, exist_ok=True)
ok_files = []
small_num = 0
img_files = natsorted(img_files)
for file in img_files:
start = time.time()
avg_volume, max_volume = detect_loud(file)
print('time', time.time() - start)
small_num += 1
shutil.copy(file, low_dir + f"{avg_volume:.0f}_{max_volume:.0f}_" + os.path.basename(file)[:-4] + f".mp3")
print(small_num, 'small', file)
pyloudnorm音量检测
import glob
import os
import shutil
import numpy as np
from pydub import AudioSegment
import pyloudnorm as pyln
import time
dir_a = r"/Users/lbg/Documents/data/volume_low"
files = glob.glob(dir_a + "/*.mp3")
low_dir = "/Users/lbg/Documents/data/volume_low4/"
os.makedirs(low_dir, exist_ok=True)
# 遍历文件
for file in files:
try:
start=time.time()
audio = AudioSegment.from_file(file)
# 转 numpy PCM (-1 ~ 1)
samples = np.array(audio.get_array_of_samples()).astype(np.float32) / (2**15)
# 建立 loudness meter(注意要用音频采样率)
meter = pyln.Meter(audio.frame_rate)
loudness = meter.integrated_loudness(samples)
print('net time',time.time()-start)
# print(os.path.basename(file), "->", loudness, "LUFS")
# 筛选条件(比如小于 -28 LUFS 判定为音量小)
if loudness < -30:
shutil.copy(file, os.path.join(low_dir, f"{loudness:.1f}_" + os.path.basename(file)))
except Exception as e:
print("Error:", file, e)
火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。
更多推荐
所有评论(0)