Cloud Speech-to-Text V2

hsuan8169 · 06-19-2024 02:01 PM

I use API, Cloud Speech-to-Text V2 from Google Cloud Platform. I set the environment using JSON file. I want to transcribe an audio file from local to text. When I execute the code, it doesn't show error but shows a lot of encodes. How to revise it?

import os
import asyncio
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech as speech
from google.protobuf.json_format import MessageToDict

# My credential file.
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../config/google_credential.json'

async def transcribe_audio(local_file_path):
    
    client = SpeechClient()

    with open(local_file_path, "rb") as audio_file:
        content = audio_file.read()
    
    # recognize how many speakers
    diarization_config = speech.SpeakerDiarizationConfig(
        min_speaker_count=2,
        max_speaker_count=6
    )
    features = speech.RecognitionFeatures(
        diarization_config=diarization_config
    )
    
    # Recognition set.
    config = speech.RecognitionConfig(
        auto_decoding_config=speech.AutoDetectDecodingConfig(),
        features=features,
        language_codes=["en-US"],
        model="long",
    )
    request = speech.RecognizeRequest(
        config=config,
        content=content
    )
    operation = await client.batch_recognize(request=request)
    response = await operation.result()

    # File result.
    for result in response.results:
        for alternative in result.alternatives:
            print(f"Transcript: {alternative.transcript}")
            print(f"Confidence: {alternative.confidence}")
            for word in alternative.words:
                print(f"Word: {word.word}, Speaker: {word.speaker_tag}")

    response_dict = MessageToDict(response)

    with open("transcript.txt", "w") as transcript_file:
        for result in response_dict['results']:
            for alternative in result['alternatives']:
                transcript_file.write(f"Transcript: {alternative['transcript']}\n")
                transcript_file.write(f"Confidence: {alternative['confidence']}\n")
                for word in alternative['words']:
                    transcript_file.write(f"Word: {word['word']}, Speaker: {word['speakerTag']}\n")

# My file.
local_file_path = "../voice/Chicago.mp3"
asyncio.run(transcribe_audio(local_file_path))

Thank you.