Skip to content

播报机器人----阅读失败 #288

@wecandoforit

Description

@wecandoforit

问题描述
版本:super-agent-party-win-v0.3.1
操作:机器人---->播报机器人--->粘贴测试文本:你好--->点击开始生成
观察现象:进度条正常,vrm字幕显示正常,但是语音没有合成

TTS策略:
1.本地使用index-tts1.5,正常对话会生成语音,但是这里播报没有
2.发现接口没有被调用
Image

希望解决:
能否给出对应修改位置

接口内容如下:
`
@app.get("/tts")
async def synthesize(
text: str = "欢迎使用语音合成接口",
infer_mode: str = "普通推理",
max_text_tokens_per_sentence: int = 120,
sentences_bucket_max_size: int = 4,
do_sample: bool = True,
top_p: float = 0.8,
top_k: int = 30,
temperature: float = 1.0,
length_penalty: float = 0.0,
num_beams: int = 3,
repetition_penalty: float = 10.0,
max_mel_tokens: int = 600
):

"""单次推理接口"""
try:
    global COUNT

    # 创建输出目录
    os.makedirs("outputs", exist_ok=True)
    
    # # 保存上传的参考音频
    # prompt_path = f"temp_prompt_{int(time.time())}.wav"
    # with open(prompt_path, "wb") as f:
    #     f.write(await prompt_audio.read())
    #
    # 使用本地固定的参考音频
    prompt_path = r"G:\AAA_myIdea\indexTTS\index-tts\zh.wav"
    if not os.path.exists(prompt_path):
        return {"error": f"参考音频不存在:{prompt_path}"}
    # 准备输出路径 循环1000次覆盖
    COUNT += 1
    if COUNT > 1000:
        COUNT = 0
    output_path = os.path.join("outputs", f"api_output_{COUNT}.wav")

    
    # 调用生成逻辑
    kwargs = {
        "do_sample": do_sample,
        "top_p": top_p,
        "top_k": top_k if top_k > 0 else None,
        "temperature": temperature,
        "length_penalty": length_penalty,
        "num_beams": num_beams,
        "repetition_penalty": repetition_penalty,
        "max_mel_tokens": max_mel_tokens,
    }

    if infer_mode == "普通推理":
        tts.infer(
            prompt_path,
            text,
            output_path,
            max_text_tokens_per_sentence=max_text_tokens_per_sentence,
            **kwargs
        )
    else:
        tts.infer_fast(
            prompt_path,
            text,
            output_path,
            max_text_tokens_per_sentence=max_text_tokens_per_sentence,
            sentences_bucket_max_size=sentences_bucket_max_size,
            **kwargs
        )
    
    # os.remove(prompt_path)
    # return FileResponse(output_path, media_type="audio/wav")
    # 返回文件,background参数会在响应完成后删除文件(可选)
    return FileResponse(
        output_path,
        media_type="audio/wav",
        headers={
            "Content-Disposition": "inline",  # 改为 inline 让浏览器直接播放
            "Cache-Control": "no-cache"
        }
    )
except Exception as e:
    raise HTTPException(status_code=500, detail=str(e))`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions