123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- # author: kdd
- # date:
- """
- ### audio_editor
- 语音编辑,切分音频,去除语音中的较长静音,去除语音首尾静音,设置采样率,设置通道数。
- 音频格式相互转换,例如wav格式转为mp3格式。
- 切分音频,去除静音,去除首尾静音输入输出都支持wav格式。
- 语音编辑功能基于pydub的方法,增加了数据格式支持。
- """
- from pathlib import Path
- import logging
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(Path(__name__).stem)
- from pydub import AudioSegment
- from pydub.silence import detect_nonsilent
- from .audio_io import anything2bytesio, _sr, _int16_max
- import numpy as np
- import io
- def convert_channels(wav, sr=_sr, value=1):
- aud = wav2audiosegment(wav, sr=sr)
- aud = aud.set_channels(channels=value)
- wav = audiosegment2wav(aud)
- return wav
- def convert_sample_rate(wav, sr=_sr, value=_sr):
- aud = wav2audiosegment(wav, sr=sr)
- aud = aud.set_frame_rate(frame_rate=value)
- wav = audiosegment2wav(aud)
- return wav
- def convert_sample_width(wav, sr=_sr, value=4):
- aud = wav2audiosegment(wav, sr=sr)
- aud = aud.set_sample_width(sample_width=value)
- wav = audiosegment2wav(aud)
- return wav
- def convert_format(wav, sr=_sr, format='mp3'):
- """
- 语音信号转为指定音频格式的bytes。
- :param wav:
- :param sr:
- :param format:
- :return:
- """
- aud = wav2audiosegment(wav, sr=sr)
- out = io.BytesIO()
- aud.export(out, format=format)
- return out.getvalue()
- def convert_format_os(inpath, outpath, out_format='mp3', in_format=None):
- """
- 音频格式转换。
- :param inpath:
- :param outpath:
- :param in_format:
- :param out_format:
- :return:
- """
- src = AudioSegment.from_file(inpath, format=in_format)
- src.export(outpath, format=out_format)
- def audiosegment2wav(data: AudioSegment):
- """
- pydub.AudioSegment格式转为音频信号wav。
- :param data:
- :return:
- """
- wav = np.array(data.get_array_of_samples()) / _int16_max
- return wav
- def wav2audiosegment(wav: np.ndarray, sr):
- """
- 音频信号wav转为pydub.AudioSegment格式。
- :param wav:
- :param sr:
- :return:
- """
- tmp = anything2bytesio(wav, sr=sr)
- out = AudioSegment.from_wav(tmp)
- return out
- def strip_silence_wave(wav: np.ndarray, sr=_sr, keep_silence_len=20, min_silence_len=100, silence_thresh=-32, **kwargs):
- """
- 去除语音前后静音。
- :param wav:
- :param sr:
- :param keep_silence_len:
- :param min_silence_len:
- :param silence_thresh:
- :param kwargs:
- :return:
- """
- data = wav2audiosegment(wav, sr=sr)
- out = strip_audio(data,
- keep_silence_len=keep_silence_len,
- min_silence_len=min_silence_len,
- silence_thresh=silence_thresh,
- **kwargs)
- out = audiosegment2wav(out)
- return out
- def strip_audio(data: AudioSegment, keep_silence_len=20, min_silence_len=100, silence_thresh=-32, **kwargs):
- nsils = detect_nonsilent(data, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
- if len(nsils) >= 1:
- return data[max(0, nsils[0][0] - keep_silence_len): min(len(data), nsils[-1][1] + keep_silence_len)]
- else:
- return AudioSegment.empty()
- def strip_audio_os(inpath, outpath, **kwargs):
- try:
- data = AudioSegment.from_file(inpath, kwargs.get('format', 'wav'))
- out = strip_audio(data, **kwargs)
- out.export(outpath, kwargs.get('format', 'wav'))
- except Exception as e:
- logger.info('Error path: {}'.format(inpath))
- logger.info('Error info: {}'.format(e))
- def split_silence_wave(wav, sr=_sr, keep_silence_len=20, min_silence_len=100, silence_thresh=-32, **kwargs):
- """
- 根据静音切分音频。
- :param wav:
- :param sr:
- :param keep_silence_len:
- :param min_silence_len:
- :param silence_thresh:
- :param kwargs:
- :return:
- """
- data = wav2audiosegment(wav, sr=sr)
- outs = split_audio(data,
- keep_silence_len=keep_silence_len,
- min_silence_len=min_silence_len,
- silence_thresh=silence_thresh,
- **kwargs)
- out_wavs = []
- for out in outs:
- wav = audiosegment2wav(out)
- out_wavs.append(wav)
- return out_wavs
- def split_audio(data: AudioSegment, keep_silence_len=20, min_silence_len=100, silence_thresh=-32, **kwargs):
- nsils = detect_nonsilent(data, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
- if len(nsils) >= 1:
- outs = []
- for ab in nsils:
- out = data[max(0, ab[0] - keep_silence_len): min(len(data), ab[1] + keep_silence_len)]
- outs.append(out)
- else:
- outs = [AudioSegment.empty()]
- return outs
- def remove_silence_wave(wav, sr=_sr, keep_silence_len=20, min_silence_len=100, silence_thresh=-32, **kwargs):
- """
- 去除音频中的静音段。
- :param wav:
- :param sr:
- :param keep_silence_len:
- :param min_silence_len:
- :param silence_thresh:
- :param kwargs:
- :return:
- """
- data = wav2audiosegment(wav, sr=sr)
- out = remove_silence_audio(data,
- keep_silence_len=keep_silence_len,
- min_silence_len=min_silence_len,
- silence_thresh=silence_thresh,
- **kwargs)
- out = audiosegment2wav(out)
- return out
- def remove_silence_audio(data: AudioSegment, keep_silence_len=20, min_silence_len=100, silence_thresh=-32, **kwargs):
- nsils = detect_nonsilent(data, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
- out = AudioSegment.empty()
- sf = 0
- for i, ab in enumerate(nsils):
- si = max(ab[0] - keep_silence_len, sf)
- ei = ab[1] + keep_silence_len
- out = out + data[si: ei]
- sf = ei
- return out
- if __name__ == "__main__":
- print(__file__)
|