Get hands-on experience with 20+ free Google Cloud products and $300 in free credit for new customers.

Unable to pass jar file properly to pyspark script to connect to AlloyDB

Below is the code i have which trigger on file upload into GCS. So the jar files are not able to pass to the script properly which connects to the database, please help. Code is to trigger pyspark script on serverless spark cluster. code highlighted in red is throwing error.

 

from google.cloud import dataproc_v1
from google.cloud.dataproc_v1 import Batch, PySparkBatch
import os


def submit_job_serverless(data,context):
    client = dataproc_v1.BatchControllerClient(client_options={"api_endpoint": "us-central1-dataproc.googleapis.com:443"})
    project_id = 'onyx-zodiac-xxxx13'
    region = 'us-central1'
    spark_script_path = 'gs://bucket/new_code_read_files.py'
    jar_file_uri = ['gs://bucket/postgresql-42.7.3.jar']
   
    # Define the PySpark batch job
    batch = Batch(
        pyspark_batch=PySparkBatch(
            {"main_python_file_uri": spark_script_path
            ,"jar_file_uris":jar_file_uri
            }
        ),
        environment_config={
            'execution_config': {
                'service_account': 'xxxxx-compute@developer.gserviceaccount.com'
            }
        }
    )
   
    # Submit the batch job
    operation = client.create_batch(parent=f"projects/{project_id}/locations/{region}", batch=batch)
    #, batch_id=batch_id
   
    # Wait for the batch job to complete
    response = operation.result()

    operation_id = response.reference.job_id

    # Print the batch job details
    print(f"Batch job finished with state: {response.state}")

    print(f"Submitted job to Serverless Dataproc cluster with operation ID {operation_id}")

    return f"Submitted job with operation ID {operation_id}"
0 0 145
0 REPLIES 0