gemini texttospeech model - Page 2

fengye

Hi, I was testing the gemini-2.5-flash-preview-tts model in vertex AI workbench instance but got the error. Other models such as gemini-2.0-flash works fine in the same instance. Any suggestions on how to fix this or any setup needed in GCP?

Error: multi_speaker_voice_config parameter is not supported in Vertex AI

The script used is as follows:

from google import genai
from google.genai import types
import wave

# Set up the wave file to save the output:
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
   with wave.open(filename, "wb") as wf:
      wf.setnchannels(channels)
      wf.setsampwidth(sample_width)
      wf.setframerate(rate)
      wf.writeframes(pcm)


prompt = """TTS the following conversation between Aaron and Alice as a podcast."""    
        
# genai_client = genai.Client(vertexai=True,
#                             project=project,
#                             location=location)
        
genai_client = genai.Client(vertexai=True, api_key=MY_API_KEY)
        
response = genai_client.models.generate_content(
            model="gemini-2.5-flash-preview-tts",
            contents=[prompt, input_json],
            config=types.GenerateContentConfig(
                response_modalities=["AUDIO"],
                speech_config=types.SpeechConfig(
                    multi_speaker_voice_config=types.MultiSpeakerVoiceConfig(
                    speaker_voice_configs=[
                       types.SpeakerVoiceConfig(
                          speaker='Aaron',
                          voice_config=types.VoiceConfig(
                             prebuilt_voice_config=types.PrebuiltVoiceConfig(
                                voice_name='Kore',
                             )
                          )
                       ),
                       types.SpeakerVoiceConfig(
                          speaker='Alice',
                          voice_config=types.VoiceConfig(
                             prebuilt_voice_config=types.PrebuiltVoiceConfig(
                                voice_name='Puck',
                             )
                          )
                       ),
                    ]
                    )
                )
            )
        )


data = response.candidates[0].content.parts[0].inline_data.data

file_name='out.wav'
wave_file(file_name, data)