|
- import torch
- from tqdm import tqdm
- from multiprocessing import Pool
- from mel_processing import spectrogram_torch, mel_spectrogram_torch
- from utils import load_wav_to_torch
-
-
- class AudioProcessor:
- def __init__(
- self,
- max_wav_value,
- use_mel_spec_posterior,
- filter_length,
- n_mel_channels,
- sampling_rate,
- hop_length,
- win_length,
- mel_fmin,
- mel_fmax,
- ):
- self.max_wav_value = max_wav_value
- self.use_mel_spec_posterior = use_mel_spec_posterior
- self.filter_length = filter_length
- self.n_mel_channels = n_mel_channels
- self.sampling_rate = sampling_rate
- self.hop_length = hop_length
- self.win_length = win_length
- self.mel_fmin = mel_fmin
- self.mel_fmax = mel_fmax
-
- def process_audio(self, filename):
- audio, sampling_rate = load_wav_to_torch(filename)
- audio_norm = audio / self.max_wav_value
- audio_norm = audio_norm.unsqueeze(0)
- spec_filename = filename.replace(".wav", ".spec.pt")
- if self.use_mel_spec_posterior:
- spec_filename = spec_filename.replace(".spec.pt", ".mel.pt")
- try:
- spec = torch.load(spec_filename)
- except:
- if self.use_mel_spec_posterior:
- spec = mel_spectrogram_torch(
- audio_norm,
- self.filter_length,
- self.n_mel_channels,
- self.sampling_rate,
- self.hop_length,
- self.win_length,
- self.mel_fmin,
- self.mel_fmax,
- center=False,
- )
- else:
- spec = spectrogram_torch(
- audio_norm,
- self.filter_length,
- self.sampling_rate,
- self.hop_length,
- self.win_length,
- center=False,
- )
- spec = torch.squeeze(spec, 0)
- torch.save(spec, spec_filename)
- return spec, audio_norm
-
-
- # 使用示例
- processor = AudioProcessor(
- max_wav_value=32768.0,
- use_mel_spec_posterior=False,
- filter_length=2048,
- n_mel_channels=128,
- sampling_rate=44100,
- hop_length=512,
- win_length=2048,
- mel_fmin=0.0,
- mel_fmax="null",
- )
-
- with open("filelists/train.list", "r") as f:
- filepaths = [line.split("|")[0] for line in f] # 取每一行的第一部分作为audiopath
-
- # 使用多进程处理
- with Pool(processes=32) as pool: # 使用4个进程
- with tqdm(total=len(filepaths)) as pbar:
- for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)):
- pbar.update()
|