|
- """
- 在线推理任务配置:
- 集群:智算GPU
- 资源:
- 镜像:xtts
- 模型:tts
- 启动脚本: tts_api.py
- """
-
- from c2net.context import prepare
-
- # 初始化导入数据集和预训练模型到容器内
- c2net_context = prepare()
-
- # 获取数据集路径
- speaker_sample_dir = c2net_context.dataset_path + "/xtts_speakers/samples"
- speaker_data_dir = c2net_context.dataset_path + "/xtts_speakers/data"
-
- # 获取预训练模型路径
- xtts_v2_path = c2net_context.pretrain_model_path + "/XTTS-v2"
-
- # 输出结果必须保存在该目录
- you_should_save_here = c2net_context.output_path
- user_voice_path = f"{you_should_save_here}/upload_speaker.wav"
- output_wav_path = f"{you_should_save_here}/output_audio.wav"
-
-
- from TTS.tts.configs.xtts_config import XttsConfig
- from TTS.tts.models.xtts import Xtts
- from TTS.utils.audio.numpy_transforms import save_wav
-
- # 加载模型
- print(f"start loading xtts model...")
- config = XttsConfig()
- config.load_json(f"{xtts_v2_path}/config.json")
- model = Xtts.init_from_config(config)
- model.load_checkpoint(config, checkpoint_dir=xtts_v2_path, eval=True)
- model.cuda()
- print(f"finish loading xtts model!")
-
- import os
- from dataclasses import dataclass
- from pathlib import Path
- from typing import Annotated, Optional
-
- import numpy
- import uvicorn
- from fastapi import FastAPI, File, Form, UploadFile
- from fastapi.responses import FileResponse
- from typing_extensions import Annotated
-
- app = FastAPI()
- base_url = os.getenv("OCTOPUS_NOTEBOOK_BASE_URL")
-
-
- @app.get(base_url)
- async def home():
- return {"server": "running"}
-
-
- def synthesize_voice_output(text: str, lang: str, speaker_wav_path: str) -> list:
- outputs = model.synthesize(
- text,
- config,
- speaker_wav=speaker_wav_path,
- gpt_cond_len=3,
- language=lang,
- )
-
- return outputs["wav"]
-
-
- def list_speaker_samples_id():
- return [f.stem for f in Path(speaker_sample_dir).glob("*") if f.is_file() and f.stem != ".DS_Store"]
-
-
- def save_bytes_audio(bytes_audio_data: bytes, save_path: str):
- save_path_obj = Path(save_path)
- save_path_obj.unlink(missing_ok=True)
-
- with open(save_path, mode="wb") as file_writer:
- file_writer.write(bytes_audio_data)
- print(f"saved bytes audio data at {save_path}")
-
-
- def save_np_audio(np_audio_data: numpy.ndarray, save_path: str):
- save_path_obj = Path(save_path)
- save_path_obj.unlink(missing_ok=True)
-
- save_wav(wav=np_audio_data, path=save_path, sample_rate=24000)
- print(f"saved np audio data at {save_path}")
-
-
- @dataclass
- class TTSResponse:
- code: int
- msg: str
- data: Optional[str] = None
-
-
- @app.get(f"{base_url}/speaker/list")
- async def list_speakers():
- return TTSResponse(
- code=1,
- msg="success",
- data=list_speaker_samples_id(),
- )
-
-
- @app.get(f"{base_url}/speaker/get_sample")
- async def download_wav_file(speaker_id: str):
- wav_path = f"{speaker_sample_dir}/{speaker_id}.wav"
- return FileResponse(wav_path, media_type="audio/wav", filename=f"{speaker_id}.wav")
-
-
- @app.post(f"{base_url}/tts/synthesize")
- async def synthesize(
- text: str = Form(...),
- lang: str = Form(...),
- speaker_id: str = Form(...),
- wav_data: UploadFile = File(default=None),
- ):
- try:
- if wav_data != None:
- save_bytes_audio(wav_data.file.read(), user_voice_path)
- speaker_wav_path = user_voice_path
- else:
- speaker_wav_path = f"{speaker_data_dir}/{speaker_id}.wav"
-
- output_audio = synthesize_voice_output(text, lang, speaker_wav_path)
- save_np_audio(output_audio, output_wav_path)
-
- return FileResponse(output_wav_path, media_type="audio/wav", filename="otuput.wav")
-
- except Exception as e:
- return str(e)
-
-
- if __name__ == "__main__":
- uvicorn.run(app, host="0.0.0.0", port=8888)
|