关于百度的语言合成改为字节跳动

我想问一下，麦克风那边是使用讯飞的语言转文字然后传到大模型，大模型再传文字回来，再使用百度的文字合成语音回来进行播放，在bool Audio2::connecttospeech(const char *speech, const char *lang)这个函数里，以下是我的修改
// 更新后的 connecttospeech 函数
bool Audio2::connecttospeech(const char *speech, const char *lang)
{
    xSemaphoreTakeRecursive(mutex_audio, portMAX_DELAY);

    setDefaults();
    
    // 修改为正确的域名和路径
    const char* host = "openspeech.bytedance.com";
    const char* path = "/api/v1/tts";
    
    // 构建 HTTP 请求头，包含必要的认证信息
    String headers = "Content-Type: application/json\r\n"
                    "Accept: audio/mp3\r\n"  // 明确指定接受 MP3 格式
                    "Authorization: Bearer your_token_here\r\n"  // 替换为实际的token
                    "Connection: keep-alive\r\n";

    // 构建 JSON 请求体
    StaticJsonDocument<1024> doc;
    
    JsonObject app = doc.createNestedObject("app");
    app["appid"] = "7224888888";
    app["token"] = "i0_c-jhtJlywGIKfOdUAUkXma5VDfM8d";
    app["cluster"] = "volcano_tts";
    
    JsonObject user = doc.createNestedObject("user");
    user["uid"] = "uid123";
    
    JsonObject audio = doc.createNestedObject("audio");
    audio["voice_type"] = "BV700_streaming";
    audio["encoding"] = "mp3";
    audio["compression_rate"] = 1;
    audio["rate"] = 24000;
    audio["speed_ratio"] = 1.0;
    audio["volume_ratio"] = 1.0;
    audio["pitch_ratio"] = 1.0;
    audio["emotion"] = "happy";
    audio["language"] = lang;
    
    JsonObject request = doc.createNestedObject("request");
    request["reqid"] = "123456";
    request["text"] = speech;
    request["text_type"] = "plain";
    request["operation"] = "query";
    request["silence_duration"] = "125";
    request["with_frontend"] = "1";
    request["frontend_type"] = "unitTson";
    request["pure_english_opt"] = "1";

    String jsonString;
    serializeJson(doc, jsonString);

    // 使用 WiFiClientSecure 进行 HTTPS 连接
    _client = static_cast<WiFiClient *>(&clientsecure);
    if (!_client->connect(host, 443))
    {
        log_e("Connection failed");
        xSemaphoreGiveRecursive(mutex_audio);
        return false;
    }

    // 发送 HTTP POST 请求
    String httpRequest = String("POST ") + path + " HTTP/1.1\r\n" +
                        "Host: " + host + "\r\n" +
                        headers +
                        "Content-Length: " + jsonString.length() + "\r\n" +
                        "\r\n" +
                        jsonString;

    if (!_client->print(httpRequest))
    {
        log_e("Failed to send request");
        _client->stop();
        xSemaphoreGiveRecursive(mutex_audio);
        return false;
    }

    m_streamType = ST_WEBFILE;
    isplaying = 1;
    m_f_running = true;
    m_f_ssl = true;
    m_f_tts = true;
    setDatamode(HTTP_RESPONSE_HEADER);
    
    xSemaphoreGiveRecursive(mutex_audio);
    return true;
}
// 更新后的 parseHttpResponseHeader 函数
bool Audio2::parseHttpResponseHeader()
{
    if (getDatamode() != HTTP_RESPONSE_HEADER)
        return false;
    if (!_client->available())
        return false;

    char rhl[512] = {0}; // responseHeaderline
    bool ct_seen = false;
    uint32_t ctime = millis();
    uint32_t timeout = 2500; // ms

    while (true)
    {
        uint16_t pos = 0;
        if ((millis() - ctime) > timeout)
        {
            log_e("Response header timeout");
            goto exit;
        }

        while (_client->available())
        {
            uint8_t b = _client->read();
            if (b == '\n')
            {
                if (!pos)
                { 
                    if (ct_seen)
                        goto lastToDo;
                    else
                        goto exit;
                }
                break;
            }
            if (b == '\r')
                rhl[pos] = 0;
            if (b < 0x20)
                continue;
            rhl[pos] = b;
            pos++;
            if (pos >= 511)
            {
                rhl[510] = '\0';
                if (m_f_Log)
                    log_i("Response header line overflow");
                break;
            }
        }

        if (!pos)
        {
            vTaskDelay(3);
            continue;
        }

        if (m_f_Log)
            log_i("HTTP Response Header: %s", rhl);

        // 转换冒号前的所有字母为小写
        int16_t posColon = indexOf(rhl, ":", 0);
        if (posColon >= 0)
        {
            for (int i = 0; i < posColon; i++)
                rhl[i] = toLowerCase(rhl[i]);
        }

        // 检查 HTTP 状态码
        if (startsWith(rhl, "HTTP/"))
        {
            char statusCode[5];
            statusCode[0] = rhl[9];
            statusCode[1] = rhl[10];
            statusCode[2] = rhl[11];
            statusCode[3] = '\0';
            int sc = atoi(statusCode);
            if (sc != 200)
            {
                log_e("HTTP Error: %d", sc);
                if (audio_showstreamtitle)
                    audio_showstreamtitle(rhl);
                goto exit;
            }
        }

        // 处理内容类型
        else if (startsWith(rhl, "content-type:"))
        {
            // 检查是否是 MP3 音频
            if (indexOf(rhl, "audio/mp3") > 0 || indexOf(rhl, "audio/mpeg") > 0)
            {
                ct_seen = true;
                m_codec = CODEC_MP3;
                setBitrate(128000); // 默认比特率
                setSampleRate(24000); // 字节跳动 TTS 的采样率
                setBitsPerSample(16);
                setChannels(2);
            }
            else
            {
                log_e("Unsupported content type: %s", rhl);
                goto exit;
            }
        }

        // 处理错误信息
        else if (startsWith(rhl, "x-error-code:") || startsWith(rhl, "x-error-message:"))
        {
            log_e("TTS Error: %s", rhl);
            goto exit;
        }
    }

lastToDo:
    if (!ct_seen)
    {
        log_e("No content type received");
        goto exit;
    }

    setDatamode(AUDIO_DATA);
    if (!initializeDecoder())
    {
        log_e("Failed to initialize decoder");
        goto exit;
    }

    if (m_f_Log)
        log_i("Switch to DATA mode, metaint is %d", m_metaint);

    if (m_playlistFormat != FORMAT_M3U8 && audio_lasthost)
        audio_lasthost(m_lastHost);

    m_controlCounter = 0;
    m_f_firstCall = true;
    return true;

exit:
    stopSong();
    return false;
}
可是无法正常跑，没有报错，但是没有语音回应。

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

关于百度的语言合成改为字节跳动 #17

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

关于百度的语言合成改为字节跳动 #17

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions