Get hands-on experience with 20+ free Google Cloud products and $300 in free credit for new customers.

Having Issue with data

Hello,
Im  a beginner trying to do a project. I generated script from chat gpt for python where it will generate dummy data of employees name, address, ssn, password etc. load it into cloud storage bucket and use data fusion wrangler to transform data. Unfortunatley when I pulled data from cloud buck in data fusion i saw some of the field are missing value, and last two columns is totally empty. Can anyone help me trouble shoot this problem?

Here is my python code:

import csv
from faker import Faker
from google.cloud import storage
import os

# Set Google Cloud project environment variable
os.environ['GOOGLE_CLOUD_PROJECT'] = 'marine-champion-432318-n3'

# Initialize Faker
fake = Faker()

# Generate dummy data
def generate_employee_data():
    data = {
        "first_name": fake.first_name(),
        "last_name": fake.last_name(),
        "email": fake.email(),
        "address": fake.address(),
        "phone_number": fake.phone_number(),
        "ssn": fake.ssn(),
        "date_of_birth": fake.date_of_birth(minimum_age=18, maximum_age=65).isoformat(),
        "password": fake.password(length=12, special_chars=True, digits=True, upper_case=True, lower_case=True)
    }
    print(data)  # Print generated data for debugging
    return data

# Save data to CSV
def save_to_csv(file_path, data_list😞
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=data_list[0].keys())
        writer.writeheader()
        writer.writerows(data_list)
    print(f"Data saved to {file_path}")

# Upload file to GCS
def upload_to_gcs(bucket_name, source_file_name, destination_blob_name😞
    """Uploads a file to the bucket."""
    # Initialize a client
    storage_client = storage.Client(project='marine-champion-432318-n3')

    # Get the bucket
    bucket = storage_client.bucket(bucket_name)

    # Create a blob object
    blob = bucket.blob(destination_blob_name)

    # Upload the file
    blob.upload_from_filename(source_file_name)
    print(f"File {source_file_name} uploaded to {destination_blob_name}.")

if __name__ == "__main__":
    # Generate a list of employee data
    employees = [generate_employee_data() for _ in range(10)]  # Adjust the number of records as needed

    # Define file paths
    csv_file_path = "employee_data.csv"

    # Save data to CSV
    save_to_csv(csv_file_path, employees)

    # Define GCS parameters
    bucket_name = "employee-project"  # Replace with your bucket name
    source_file_name = "employee_data.csv"
    destination_blob_name = "employee_data.csv"  # Blob name in GCS

    # Upload the file to GCS
    upload_to_gcs(bucket_name, source_file_name, destination_blob_name)


Here is a screenshot of the data viewed via wrangles in data fusion:
Asif_Shaharia_0-1724027536178.png

I didn't do anything on the GCP UI. Used python script to load data in bucket. Please guide me.

0 1 184
1 REPLY 1