Below is the code i have which trigger on file upload into GCS. So the jar files are not able to pass to the script properly which connects to the database, please help. Code is to trigger pyspark script on serverless spark cluster. code highlighted in red is throwing error.
from google.cloud import dataproc_v1
from google.cloud.dataproc_v1 import Batch, PySparkBatch
import os
def submit_job_serverless(data,context):
client = dataproc_v1.BatchControllerClient(client_options={"api_endpoint": "us-central1-dataproc.googleapis.com:443"})
project_id = 'onyx-zodiac-xxxx13'
region = 'us-central1'
spark_script_path = 'gs://bucket/new_code_read_files.py'
jar_file_uri = ['gs://bucket/postgresql-42.7.3.jar']
# Define the PySpark batch job
batch = Batch(
pyspark_batch=PySparkBatch(
{"main_python_file_uri": spark_script_path
,"jar_file_uris":jar_file_uri
}
),
environment_config={
'execution_config': {
'service_account': 'xxxxx-compute@developer.gserviceaccount.com'
}
}
)
# Submit the batch job
operation = client.create_batch(parent=f"projects/{project_id}/locations/{region}", batch=batch)
#, batch_id=batch_id
# Wait for the batch job to complete
response = operation.result()
operation_id = response.reference.job_id
# Print the batch job details
print(f"Batch job finished with state: {response.state}")
print(f"Submitted job to Serverless Dataproc cluster with operation ID {operation_id}")
return f"Submitted job with operation ID {operation_id}"