Summary of the Issue:
I am trying to integrate long form synthesis into my app and it keeps returning this error.
Google Long-form API error: {
"error": {
"code": 400,
"message": "Can't write to GCS uri `gs://XXXYYY-audio-output/tts-output-1749443435083.wav`. Make sure to follow the steps at https://cloud.google.com/text-to-speech/docs/create-audio-text-long-audio-synthesis#before-you-begin. Invalid authentication from policy (go/gcs-rpc-sp): Rejected by creds_policy: Permission 'auth.creds.useNormalUserEUC' not granted to cloud-ml-tts-frontend-prod-regional-jobs@prod.google.com, because it satisfies none of the 1 rules granting that permission.; RpcSecurityPolicy http://rpcsp/p/1hEJNqELrPdL_GAiNvmvq3Bhjsd04r_gQIKuyIT27SQ ",
"status": "INVALID_ARGUMENT"
}
}
This is probably because the service agent required for the Text-to-Speech API is failing to be created automatically. ( i dont have an option to create a TTS Service agent directly)
Troubleshooting Steps Taken:
Confirmed our application's service account has all necessary Storage roles, including Service Usage Consumer and Service Account Token Creator.
Attempted to force-create the service agent using gcloud beta services identity create --service=texttospeech.googleapis.com --project=long-form-synthesis. This command consistently fails with the internal error SU_INTERNAL_GENERATE_SERVICE_IDENTITY.
Completely disabled and re-enabled the Text-to-Speech API for the project. The problem persists.
To rule out any project-specific misconfiguration, we created a brand new, clean project (long-form-synthesis). This new project exhibits the exact same SU_INTERNAL_GENERATE_SERVICE_IDENTITY error and failure to create the service agent.
Below is the code,
import { type NextRequest, NextResponse } from "next/server"
import fs from "fs"
import path from "path"
import * as jose from "jose"
// Configuration for regional endpoint
const REGION = "us-central1" // You can change this to your preferred region
// Function to generate a JWT token for Google API authentication
async function generateGoogleJwt(credentials: any) {
const now = Math.floor(Date.now() / 1000)
const privateKey = credentials.private_key.replace(/\\n/g, "\n")
const payload = {
iss: credentials.client_email,
sub: credentials.client_email,
aud: "https://oauth2.googleapis.com/token",
iat: now,
exp: now + 3600,
scope: "https://www.googleapis.com/auth/cloud-platform",
}
const privateKeyImported = await jose.importPKCS8(privateKey, "RS256")
const token = await new jose.SignJWT(payload).setProtectedHeader({ alg: "RS256" }).sign(privateKeyImported)
return token
}
async function getAccessToken(credentials: any) {
const jwtAssertion = await generateGoogleJwt(credentials)
const tokenResponse = await fetch("https://oauth2.googleapis.com/token", {
method: "POST",
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
body: new URLSearchParams({
grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer",
assertion: jwtAssertion,
}),
})
if (!tokenResponse.ok) {
const errorText = await tokenResponse.text()
throw new Error(`Failed to get access token: ${tokenResponse.status} ${errorText}`)
}
const tokenData = await tokenResponse.json()
return tokenData.access_token
}
// ADD THIS NEW FUNCTION - API Kickstart to create service agent
async function triggerStandardSynthesis(token: string, projectId: string) {
console.log("Attempting to 'kickstart' the API with a standard synthesis call...")
const requestBody = {
input: { text: "hello" }, // A very short text
voice: { languageCode: "en-US", name: "en-US-Neural2-A" },
audioConfig: { audioEncoding: "MP3" },
}
const response = await fetch("https://texttospeech.googleapis.com/v1/text:synthesize", {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
"x-goog-user-project": projectId, // Pass the project ID in a header
},
body: JSON.stringify(requestBody),
})
if (!response.ok) {
const errorText = await response.text()
console.error("Standard synthesis trigger failed:", errorText)
throw new Error(`Failed to kickstart API. Status: ${response.status}`)
}
const responseData = await response.json()
// We don't need the audio content, we just need the call to succeed.
if (responseData.audioContent) {
console.log("✅ SUCCESS: Standard API call successful. The service agent should now exist.")
return true
}
return false
}
// Function to start long-form synthesis using correct v1beta1 endpoint
async function startLongFormSynthesis(
text: string,
voice: any,
token: string,
audioEncoding = "LINEAR16",
projectId: string,
) {
// Create a timestamp for unique file naming
const timestamp = Date.now()
const fileExtension = audioEncoding === "MP3" ? "mp3" : audioEncoding === "LINEAR16" ? "wav" : "ogg"
// Use the specified bucket: gs://XXXYYY-audio-output
const outputGcsUri = `gs://XXXYYY-audio-output/tts-output-${timestamp}.${fileExtension}`
console.log(`Using output GCS URI: ${outputGcsUri}`)
const requestBody = {
input: { text },
voice: {
languageCode: "en-US",
name: "en-US-Standard-A",
ssmlGender: "FEMALE",
},
audioConfig: {
audioEncoding: audioEncoding,
},
outputGcsUri: outputGcsUri,
}
// Correct URL format with v1beta1 and full project path
const url = `https://${REGION}-texttospeech.googleapis.com/v1beta1/projects/${projectId}/locations/${REGION}:synthesizeLongAudio`
console.log("Starting long-form synthesis with default voice (en-US-Standard-A)")
console.log(`Using correct v1beta1 endpoint: ${url}`)
const response = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: JSON.stringify(requestBody),
})
if (!response.ok) {
const errorText = await response.text()
console.error("Google Long-form API error:", errorText)
// Try to parse as JSON first, if that fails, return the raw text
let errorDetails
try {
const errorJson = JSON.parse(errorText)
errorDetails = JSON.stringify(errorJson, null, 2)
} catch {
errorDetails = errorText
}
throw new Error(`Google Long-form API Error (${response.status}) from ${REGION}: ${errorDetails}`)
}
const responseData = await response.json()
return responseData.name // This is the operation name
}
export async function POST(request: NextRequest) {
console.log("Long-form text-to-speech API route called")
try {
const body = await request.json()
const { text, voice, audioEncoding = "LINEAR16" } = body
if (!text) {
return NextResponse.json({ error: "Text is required" }, { status: 400 })
}
// Check if credentials file exists
const credentialsPath = path.join(process.cwd(), "long-form-credentials.json")
if (!fs.existsSync(credentialsPath)) {
return NextResponse.json({ error: "Long-form credentials file not found" }, { status: 500 })
}
// Read credentials file
const fileContent = fs.readFileSync(credentialsPath, "utf8")
const credentials = JSON.parse(fileContent)
if (!credentials.private_key || !credentials.client_email || !credentials.project_id) {
throw new Error("Credentials file is missing required fields (private_key, client_email, project_id)")
}
// Generate access token for authentication
const token = await getAccessToken(credentials)
console.log("Access token generated successfully for long-form synthesis")
// ====================================================================
// TEMPORARY CODE TO KICKSTART THE API - ADD THIS BLOCK
// Make sure your new project ID is available
const projectId = credentials.project_id
if (!projectId) throw new Error("Project ID is missing from credentials!")
await triggerStandardSynthesis(token, projectId)
// ====================================================================
// Start long-form synthesis with correct project ID
const operationName = await startLongFormSynthesis(text, voice, token, audioEncoding, credentials.project_id)
console.log("Long-form synthesis started with operation:", operationName)
return NextResponse.json({
success: true,
operationName: operationName,
region: REGION,
projectId: credentials.project_id,
message: "Long-form synthesis started successfully",
})
} catch (error) {
console.error("Error in long-form text-to-speech:", error)
return NextResponse.json(
{
error: "Failed to start long-form synthesis",
details: error instanceof Error ? error.message : "Unknown error",
},
{ status: 500 },
)
}
}