Error in GCP speech to text v2 with chirp-2 model - Page 2

Semenovec · 12-04-2024 02:37 PM

Hi)
I want to make a speech recognition service for wolof language.

I found model and location in documentations:

https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
location-us-central1, language - Wolof( Senegal), language_code wo-SN, model - chirp_2

Then i write some cpp code(this is not production code):

constexpr int SAMPLE_RATE = 16000;

constexpr int FRAMES_PER_BUFFER = 4160;

constexpr int NUM_CHANNELS = 1;

constexpr PaSampleFormat SAMPLE_FORMAT = paInt16;

void test_google_speech() {

std::string server_address = "us-central1-speech.googleapis.com";

std::string language = "wo-SN";

std::string location = "us-central1";

std::string credentials_file = KEY_PATH;

std::string model = "chirp_2";

std::string project_id;

auto channel_creds = load_credentials(credentials_file, project_id);

auto channel = grpc::CreateChannel(server_address, channel_creds);

auto stub = google::cloud::speech::v2::Speech::NewStub(channel);

std::string recognizer_name;

try {

recognizer_name = create_recognizer(stub.get(), project_id, location, language,model);

std::cout<<"Recognizer name:"<<recognizer_name<<std::endl;

record_and_send_google(stub.get(), recognizer_name);

} catch (const std::exception &e) {

std::cerr << "Error: " << e.what() << std::endl;

}

}

firstly i create recognizer:

grpc::ClientContext context;

google::cloud::speech::v2::CreateRecognizerRequest request;

std::string recognizer_id = "testwo";

std::string parent = "projects/" + project_id + "/locations/" + location;

std::string recognizer_name = parent + "/recognizers/" + recognizer_id;

if (check_recognizer_exists(stub, recognizer_name)) {

return recognizer_name;

}

request.set_parent(parent);

request.set_recognizer_id(recognizer_id);

google::cloud::speech::v2::Recognizer recognizer;

recognizer.add_language_codes(language);

std::cout<<"Language:"<<language.c_str()<<std::endl;

recognizer.set_display_name("Recognizer for " + language);

recognizer.set_model(model);

request.mutable_recognizer()->CopyFrom(recognizer);

google::longrunning::Operation operation;

grpc::Status status = stub->CreateRecognizer(&context, request, &operation);

then i use it:

grpc::ClientContext context;

google::cloud::speech::v2::StreamingRecognizeRequest config_request;

config_request.set_recognizer(recognizer_name);

auto *streaming_config = config_request.mutable_streaming_config();

auto *recognition_config = streaming_config->mutable_config();

recognition_config->mutable_explicit_decoding_config()->set_encoding(google::cloud::speech::v2::ExplicitDecodingConfig::LINEAR16);

recognition_config->mutable_explicit_decoding_config()->set_sample_rate_hertz(SAMPLE_RATE);

recognition_config->mutable_explicit_decoding_config()->set_audio_channel_count(NUM_CHANNELS);

auto *streamingFeatures = streaming_config->mutable_streaming_features();

streamingFeatures->set_interim_results(true);

auto grpc_stream = stub->StreamingRecognize(&context);

if (!grpc_stream->Write(config_request)) {

throw std::runtime_error("Failed to send config request");

}

std::atomic<bool> stop_recognition(false);

std::thread response_thread([&]() {

google::cloud::speech::v2::StreamingRecognizeResponse response;

while (grpc_stream->Read(&response)) {

for (const auto &result : response.results()) {

if (result.is_final()) {

std::string transcript;

for (const auto &alternative : result.alternatives()) {

transcript += alternative.transcript();

}

std::cout << "Recognized (final): " << transcript << std::endl;

stop_recognition.store(true);

return;

} else {

std::string interim;

for (const auto &alternative : result.alternatives()) {

interim += alternative.transcript();

}

std::cout << "Recognized (interim): " << interim << std::endl;

}

});

int16_t buffer[FRAMES_PER_BUFFER];

while (!stop_recognition.load()) {

err = Pa_ReadStream(audio_stream, buffer, FRAMES_PER_BUFFER);

if (err && err != paInputOverflowed) {

throw std::runtime_error("PortAudio error: " + std::string(Pa_GetErrorText(err)));

}

google::cloud::speech::v2::StreamingRecognizeRequest audio_request;

audio_request.set_audio(std::string(reinterpret_cast<const char *>(buffer), sizeof(buffer)));

std::cout<<"send audio: "<<sizeof(buffer)<<std::endl;

if (!grpc_stream->Write(audio_request)) {

break;

}

grpc_stream->WritesDone();

response_thread.join();

grpc::Status status = grpc_stream->Finish();

if (!status.ok()) {

std::cerr << "Error code: " << status.error_code() << "\n"

<< "Error details: " << status.error_details() << "\n"

<< "Error message: " << status.error_message() << std::endl;

throw std::runtime_error("gRPC call failed ffff: " + status.error_message());

}

but for all languages except english i receive something like this:

Error code: 3
Error details:
Error message: StreamingRecognize does not support the "wo-SN" language code for the "chirp_2" model at location "us-central1".

but i see this languages and models here:
https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages

I see created recognizers in console.
I also see google.cloud.speech.v2.Speech.StreamingRecognize requests in "APi and services" detailes,but error rate is 90%.
All works only with english language.
How can i solve this problem with speech api v2?