目录

静音设置:

音量设置:

音量检测速度优化版:

pudub音量检测

平均音量

pyloudnorm音量检测


静音设置:

pactl set-sink-mute alsa_output.pci-0000_00_14.0.analog-stereo 0
作用:设置是否静音

0 = 解除静音

1 = 静音

音量设置:

pactl set-sink-volume alsa_output.pci-0000_00_14.0.analog-stereo 50%

# 增加主音量15%
amixer sset Master 15%+

# 增加PCM设备音量15%
amixer sset PCM 15%+

耳机和外放不一样

耳机没有杂音,声音小也能听清,外放有周围环境杂音,声音小了听不到

然后参考不一样,如果参考声音大的,小的就感觉比较明显了。

音量检测速度优化版:



import glob
import os
import shutil
import time
import soundfile as sf
import numpy as np

def rms_dbfs_numpy(samples: np.ndarray) -> float:
    """计算一段音频的 RMS (dBFS)"""
    if len(samples) == 0:
        return -100.0
    # 转 float 避免溢出
    samples = samples.astype(np.float32)
    rms = np.sqrt(np.mean(samples ** 2))
    if rms == 0:
        return -100.0
    return 20 * np.log10(rms / (2**15))  # int16 最大值归一化

def detect_loud(file: str, threshold: float=-42.0, step_sec: int=1) -> (bool, float): # type: ignore
    """
    检测文件是否存在超过阈值的片段
    :param file: 音频文件路径
    :param threshold: dBFS 阈值
    :param step_sec: 检测步长(秒)
    :return: (是否超过阈值, 最大 dBFS)
    """
    try:
        data, sr = sf.read(file, dtype='int16')
    except Exception as e:
        print(f"[跳过] {file}, 错误: {e}")
        return False, -100.0

    step = sr * step_sec
    max_fb = -100.0

    for i in range(0, len(data), step):
        segment = data[i:i+step]
        if len(segment) == 0:
            break
        rms_val = rms_dbfs_numpy(segment)
        max_fb = max(max_fb, rms_val)
        if rms_val > threshold:
            return True, max_fb  # 找到即返回,避免无谓计算
    return False, max_fb

if __name__ == "__main__":

    dir_a=r"/Users/lbg/Documents/data/volume_low"

    files=glob.glob(dir_a+"/*.mp3")

    low_dir="/Users/lbg/Documents/data/volume_low32/"
    os.makedirs(low_dir,exist_ok=True)
    ok_files = []
    small_num=0

    threshold=-40.0
    step_sec =1
    for file in files:
        start=time.time()
        ok, max_fb = detect_loud(file, threshold=threshold, step_sec=step_sec)

        print('time',time.time()-start)
        if ok:
            ok_files.append(file)

        max_fb=-max_fb
        if ok==0:
            small_num+=1
            shutil.copy(file,low_dir+f"{max_fb:.0f}_"+os.path.basename(file)[:-4]+f".mp3")
            print(small_num,'small',file)    

pudub音量检测



import glob
import time
from pydub import AudioSegment
import numpy as np
import os
import shutil

def rms_dbfs(segment):
    samples = np.array(segment.get_array_of_samples())
    rms = np.sqrt(np.mean(samples.astype(float)**2))
    if rms == 0:
        return -100.0
    return 20 * np.log10(rms / (2**15))  # int16 最大值归一化

dir_a=r"/Users/lbg/Documents/data/volume_low"

files=glob.glob(dir_a+"/*.mp3")

low_dir="/Users/lbg/Documents/data/volume_low31/"
os.makedirs(low_dir,exist_ok=True)
ok_paths=[]
small_num=0
for file in files:

    start=time.time()
    try:
        audio = AudioSegment.from_file(file)
    except Exception as e:
        print(e)
        continue
    ok_=0
    max_fb=-100
    step = 1000  # 毫秒
    
    for i in range(0, len(audio), step):
        segment = audio[i:i+step]

        rms_val = rms_dbfs(segment)
        max_fb=max(rms_val,max_fb)    
        if rms_val > -42:  # 根据经验 -20~-25 dBFS 以上算正常说话声
            ok_=1
            ok_paths.append(file)
            break
        # print(f"第 {i//1000} 秒: {segment.dBFS:.2f} dBFS")
    print('time',time.time()-start,len(audio))
    max_fb=-max_fb
    if ok_==0:
        small_num+=1
        shutil.copy(file,low_dir+f"{max_fb:.0f}_"+os.path.basename(file)[:-4]+f".mp3")
        print(small_num,'small',file)

平均音量

import glob
import os
import shutil
import time
import soundfile as sf
import numpy as np

import os
import shutil

from natsort import natsorted

def rms_dbfs_numpy(samples: np.ndarray) -> float:
    """计算一段音频的 RMS (dBFS)"""
    if len(samples) == 0:
        return -100.0
    # 转 float 避免溢出
    samples = samples.astype(np.float32)
    rms = np.sqrt(np.mean(samples ** 2))
    if rms == 0:
        return -100.0
    return 20 * np.log10(rms / (2 ** 15))  # int16 最大值归一化


# def rms_dbfs_numpy(data):
#     """计算音频段的RMS值(以dBFS表示)"""
#     data = data.astype(np.float32)
#     rms = np.sqrt(np.mean(data ** 2))
#     return 20 * np.log10(rms) if rms > 0 else -100.0


def get_audio_levels(data, step):
    """
    获取音频的平均音量和最大音量
    """
    max_db = -100.0
    total_db = 0.0
    count = 0

    for i in range(0, len(data), step):
        segment = data[i:i + step]
        if len(segment) == 0:
            break
        rms_val = rms_dbfs_numpy(segment)
        max_db = max(max_db, rms_val)
        total_db += rms_val
        count += 1

    avg_db = total_db / count if count > 0 else -100.0
    return avg_db, max_db

def detect_loud(file) -> (float, float):  # type: ignore

    try:
        data, sr = sf.read(file, dtype='int16')
    except Exception as e:
        print(f"[跳过] {file}, 错误: {e}")
        return False, -100.0

    step_sec = 0.1  # 分析窗口长度(秒)
    step = sr * step_sec  # 转换为采样点数
    step = int(step)
    # 假设data是您的音频数据
    avg_volume, max_volume = get_audio_levels(data, step)

    return avg_volume, max_volume


if __name__ == "__main__":

    dir_a = r"B:\data\audio\music\label\music_no"
    # dir_a = r"B:\data\audio\music\label\music_ok"

    img_files = ['%s/%s' % (i[0].replace("\\", "/"), j) for i in os.walk(dir_a) for j in i[-1] if j.lower().endswith(('wav', 'pkl', 'jpeg'))]

    low_dir = "volume_low32_no/"
    os.makedirs(low_dir, exist_ok=True)
    ok_files = []
    small_num = 0
    img_files = natsorted(img_files)
    for file in img_files:
        start = time.time()
        avg_volume, max_volume = detect_loud(file)

        print('time', time.time() - start)
        small_num += 1
        shutil.copy(file, low_dir + f"{avg_volume:.0f}_{max_volume:.0f}_" + os.path.basename(file)[:-4] + f".mp3")
        print(small_num, 'small', file)

pyloudnorm音量检测

import glob
import os
import shutil
import numpy as np
from pydub import AudioSegment
import pyloudnorm as pyln

import time
dir_a = r"/Users/lbg/Documents/data/volume_low"
files = glob.glob(dir_a + "/*.mp3")

low_dir = "/Users/lbg/Documents/data/volume_low4/"
os.makedirs(low_dir, exist_ok=True)

# 遍历文件
for file in files:
    try:
        
        start=time.time()
        audio = AudioSegment.from_file(file)

        # 转 numpy PCM (-1 ~ 1)
        samples = np.array(audio.get_array_of_samples()).astype(np.float32) / (2**15)

        # 建立 loudness meter(注意要用音频采样率)
        meter = pyln.Meter(audio.frame_rate)  
        loudness = meter.integrated_loudness(samples)
        print('net time',time.time()-start)
        # print(os.path.basename(file), "->", loudness, "LUFS")

        # 筛选条件(比如小于 -28 LUFS 判定为音量小)
        if loudness < -30:
            shutil.copy(file, os.path.join(low_dir, f"{loudness:.1f}_" + os.path.basename(file)))

    except Exception as e:
        print("Error:", file, e)

Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐