1234567891011121314151617181920212223242526272829303132333435 |
- import os
- import librosa
- import numpy as np
- from scipy.io import wavfile
- from tqdm import tqdm
- def prepare_align(config):
- in_dir = config["path"]["corpus_path"]
- out_dir = config["path"]["raw_path"]
- sampling_rate = config["preprocessing"]["audio"]["sampling_rate"]
- max_wav_value = config["preprocessing"]["audio"]["max_wav_value"]
- for dataset in ["train", "test"]:
- print("Processing {}ing set...".format(dataset))
- with open(os.path.join(in_dir, dataset, "content.txt"), encoding="utf-8") as f:
- for line in tqdm(f):
- wav_name, text = line.strip("\n").split("\t")
- speaker = wav_name[:7]
- text = text.split(" ")[1::2]
- wav_path = os.path.join(in_dir, dataset,speaker, wav_name)
- if os.path.exists(wav_path):
- os.makedirs(os.path.join(out_dir, speaker), exist_ok=True)
- wav, _ = librosa.load(wav_path, sampling_rate)
- wav = wav / max(abs(wav)) * max_wav_value
- wavfile.write(
- os.path.join(out_dir, speaker, wav_name),
- sampling_rate,
- wav.astype(np.int16),
- )
- with open(
- os.path.join(out_dir, speaker, "{}.lab".format(wav_name[:11])),
- "w",
- ) as f1:
- f1.write(" ".join(text))
|