Get hands-on experience with 20+ free Google Cloud products and $300 in free credit for new customers.

Error in GCP speech to text v2 with chirp-2 model

Hi)
I want to make a speech recognition service for wolof language.

I found model and location in documentations:

https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
location-us-central1, language - Wolof( Senegal), language_code wo-SN, model - chirp_2

Then i write some cpp code(this is not production code):

constexpr int SAMPLE_RATE = 16000;
constexpr int FRAMES_PER_BUFFER = 4160;
constexpr int NUM_CHANNELS = 1;
constexpr PaSampleFormat SAMPLE_FORMAT = paInt16;

void
test_google_speech() {
std::string server_address = "us-central1-speech.googleapis.com";
std::string language = "wo-SN";
std::string location = "us-central1";
std::string credentials_file = KEY_PATH;
std::string model = "chirp_2";
std::string project_id;
auto channel_creds = load_credentials(credentials_file, project_id);
auto channel = grpc::CreateChannel(server_address, channel_creds);
auto stub = google::cloud::speech::v2::Speech::NewStub(channel);

std::string recognizer_name;
try {
recognizer_name = create_recognizer(stub.get(), project_id, location, language,model);
std::cout<<"Recognizer name:"<<recognizer_name<<std::endl;
record_and_send_google(stub.get(), recognizer_name);
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
}
}


firstly i create recognizer:

grpc::ClientContext context;
google::cloud::speech::v2::CreateRecognizerRequest request;


std::string recognizer_id = "testwo";

std::string parent = "projects/" + project_id + "/locations/" + location;

std::string recognizer_name = parent + "/recognizers/" + recognizer_id;

if (check_recognizer_exists(stub, recognizer_name)) {
return recognizer_name;
}
request.set_parent(parent);
request.set_recognizer_id(recognizer_id);

google::cloud::speech::v2::Recognizer recognizer;
recognizer.add_language_codes(language);
std::cout<<"Language:"<<language.c_str()<<std::endl;
recognizer.set_display_name("Recognizer for " + language);
recognizer.set_model(model);
request.mutable_recognizer()->CopyFrom(recognizer);


google::longrunning::Operation operation;
grpc::Status status = stub->CreateRecognizer(&context, request, &operation);



then i use it:

grpc::ClientContext context;

google::cloud::speech::v2::StreamingRecognizeRequest config_request;
config_request.set_recognizer(recognizer_name);

auto *streaming_config = config_request.mutable_streaming_config();
auto *recognition_config = streaming_config->mutable_config();
recognition_config->mutable_explicit_decoding_config()->set_encoding(google::cloud::speech::v2::ExplicitDecodingConfig::LINEAR16);
recognition_config->mutable_explicit_decoding_config()->set_sample_rate_hertz(SAMPLE_RATE);
recognition_config->mutable_explicit_decoding_config()->set_audio_channel_count(NUM_CHANNELS);
auto *streamingFeatures = streaming_config->mutable_streaming_features();
streamingFeatures->set_interim_results(true);
auto grpc_stream = stub->StreamingRecognize(&context);
if (!grpc_stream->Write(config_request)) {
throw std::runtime_error("Failed to send config request");
}
std::atomic<bool> stop_recognition(false);
std::thread response_thread([&]() {
google::cloud::speech::v2::StreamingRecognizeResponse response;
while (grpc_stream->Read(&response)) {
for (const auto &result : response.results()) {
if (result.is_final()) {
std::string transcript;
for (const auto &alternative : result.alternatives()) {
transcript += alternative.transcript();
}
std::cout << "Recognized (final): " << transcript << std::endl;
stop_recognition.store(true);
return;
} else {
std::string interim;
for (const auto &alternative : result.alternatives()) {
interim += alternative.transcript();
}
std::cout << "Recognized (interim): " << interim << std::endl;
}
}
}
});

int16_t buffer[FRAMES_PER_BUFFER];
while (!stop_recognition.load()) {
err = Pa_ReadStream(audio_stream, buffer, FRAMES_PER_BUFFER);
if (err && err != paInputOverflowed) {
throw std::runtime_error("PortAudio error: " + std::string(Pa_GetErrorText(err)));
}

google::cloud::speech::v2::StreamingRecognizeRequest audio_request;
audio_request.set_audio(std::string(reinterpret_cast<const char *>(buffer), sizeof(buffer)));
std::cout<<"send audio: "<<sizeof(buffer)<<std::endl;
if (!grpc_stream->Write(audio_request)) {
break;
}
}

grpc_stream->WritesDone();
response_thread.join();

grpc::Status status = grpc_stream->Finish();
if (!status.ok()) {
std::cerr << "Error code: " << status.error_code() << "\n"
<< "Error details: " << status.error_details() << "\n"
<< "Error message: " << status.error_message() << std::endl;
throw std::runtime_error("gRPC call failed ffff: " + status.error_message());
}



but for all languages except english i receive something like this:


Error code: 3
Error details:
Error message: StreamingRecognize does not support the "wo-SN" language code for the "chirp_2" model at location "us-central1".


but i see this languages and models here:
https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages

I see created recognizers in console.
I also see google.cloud.speech.v2.Speech.StreamingRecognize requests in "APi and services" detailes,but error rate is 90%.
All works only with english language.
How can i solve this problem with speech api v2?
0 1 1,536
1 REPLY 1