I am trying to create a job through python, this is my creation script:
def submit_download_sra_fastq(sra_batch_file, batch_name, project_id, region, job_name, docker_image, cpus, memory_mib,
machine_type, max_retry_count, run_minutes, parallelism):
sra_count = 0
with open(sra_batch_file) as infile:
for _ in infile:
sra_count += 1
if batch_name is None:
gcs_batch_file = 'gs://sra-fastq/batches/sra-batch'+str(time.time()).split('.')[0]+'.txt'
else:
gcs_batch_file = 'gs://sra-fastq/batches/' + batch_name + '.txt'
command = f'gsutil cp {sra_batch_file} {gcs_batch_file}'
subprocess.run(command.split())
client = batch_v1.BatchServiceClient()
# Define what will be done as part of the job.
runnable = batch_v1.Runnable()
runnable.container = batch_v1.Runnable.Container()
runnable.container.image_uri = docker_image
runnable.container.options = '--mount type=bind,src=/mnt/disks/sra-download/,target=/sra-download'
runnable.container.entrypoint = "/bin/sh"
runnable.container.commands = ["-c", f"hsubatch download-sra-fastq {gcs_batch_file} $BATCH_TASK_INDEX"]
# Jobs can be divided into tasks. In this case, we have only one task.
task = batch_v1.TaskSpec()
task.runnables = [runnable]
volume = batch_v1.Volume()
volume.device_name = 'sra-download'
volume.mount_path = "/mnt/disks/sra-download/"
volume.mount_options = "rw,async"
task.volumes = [volume]
# We can specify what resources are requested by each task.
resources = batch_v1.ComputeResource()
resources.cpu_milli = cpus * 1000 # in milliseconds per cpu-second. This means the task requires 2 whole CPUs.
resources.memory_mib = memory_mib # in MiB
task.compute_resource = resources
task.max_retry_count = max_retry_count
task.max_run_duration = str(run_minutes * 60) + "s"
# Tasks are grouped inside a job using TaskGroups.
# Currently, it's possible to have only one task group.
group = batch_v1.TaskGroup()
group.task_spec = task
group.task_count = sra_count
group.parallelism = parallelism
# Policies are used to define on what kind of virtual machines the tasks will run on.
# In this case, we tell the system to use "e2-standard-4" machine type.
# Read more about machine types here: https://cloud.google.com/compute/docs/machine-types
policy = batch_v1.AllocationPolicy.InstancePolicy()
policy.machine_type = machine_type
policy.provisioning_model = "SPOT"
newdisk = batch_v1.AllocationPolicy.AttachedDisk()
newdisk.new_disk.size_gb = 100
newdisk.new_disk.type_ = 'pd-ssd'
newdisk.device_name = 'sra-download'
policy.disks = [newdisk]
instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()
instances.policy = policy
allocation_policy = batch_v1.AllocationPolicy()
allocation_policy.instances = [instances]
job = batch_v1.Job()
job.task_groups = [group]
job.allocation_policy = allocation_policy
job.labels = {"env": "testing", "type": "container"}
# We use Cloud Logging as it's an out of the box available option
job.logs_policy = batch_v1.LogsPolicy()
job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING
create_request = batch_v1.CreateJobRequest()
create_request.job = job
create_request.job_id = job_name
# The job's parent is the region in which the job will run
create_request.parent = f"projects/{project_id}/locations/{region}"
client.create_job(create_request)
print(f"Created job: {job_name}")
You can see that I am trying to mount a new disk to this instance, and then run a docker image that bind mounts to that new disk. I get these errors when I run the command:
ERROR 2023-01-12T07:21:24.898106623Z mke2fs 1.46.5 (30-Dec-2021)
INFO 2023-01-12T07:21:24.901911705Z Discarding device blocks: 0/26214400
INFO 2023-01-12T07:21:24.966901031Z done
INFO 2023-01-12T07:21:24.967126864Z Creating filesystem with 26214400 4k blocks and 6553600 inodes Filesystem UUID: a26b88d9-e73b-4a34-ba38-5b11c9b6f996 Superblock backups stored on blocks: 32768, 98304, 163840, 229376, 294912
INFO 2023-01-12T07:21:24.967145137Z , 819200, 884736, 1605632, 2654208, 4096000
INFO 2023-01-12T07:21:24.967153330Z , 7962624,
INFO 2023-01-12T07:21:24.967160683Z 11239424
INFO 2023-01-12T07:21:24.967173601Z , 20480000
INFO 2023-01-12T07:21:24.967189740Z , 23887872 Allocating group tables: 0/800
INFO 2023-01-12T07:21:24.968106547Z done
INFO 2023-01-12T07:21:24.968778284Z Writing inode tables: 0/800
INFO 2023-01-12T07:21:25.002198029Z 68/800
INFO 2023-01-12T07:21:25.174615487Z f927477c601c: Pull complete
INFO 2023-01-12T07:21:25.245055966Z 7eb0e5940f70: Pull complete
INFO 2023-01-12T07:21:25.257080266Z Digest: sha256:3be61617119fbccfd39a8b892debe155abb6e2d96ba7dcd1f1f911b24847a619
INFO 2023-01-12T07:21:25.262943714Z Status: Downloaded newer image for gcr.io/mdurrant/hsulab-gcp-batch:latest
INFO 2023-01-12T07:21:25.266026529Z gcr.io/mdurrant/hsulab-gcp-batch:latest
ERROR 2023-01-12T07:21:25.278811140Z mke2fs 1.46.5 (30-Dec-2021)
INFO 2023-01-12T07:21:25.282597747Z Discarding device blocks: 0/26214400
INFO 2023-01-12T07:21:25.342568710Z done
INFO 2023-01-12T07:21:25.342952775Z Creating filesystem with 26214400 4k blocks and 6553600 inodes Filesystem UUID: 572c3a05-08af-4efb-9e14-24833c696273 Superblock backups stored on blocks: 32768, 98304, 163840, 229376, 294912, 819200, 884736,
INFO 2023-01-12T07:21:25.342978304Z 1605632, 2654208, 4096000, 7962624, 11239424, 20480000, 23887872
INFO 2023-01-12T07:21:25.342985559Z Allocating group tables:
INFO 2023-01-12T07:21:25.342992886Z 0/800
INFO 2023-01-12T07:21:25.343864852Z done
INFO 2023-01-12T07:21:25.344516947Z Writing inode tables: 0/800
INFO 2023-01-12T07:21:25.377867873Z done
INFO 2023-01-12T07:21:25.382698785Z Creating journal (131072 blocks):
INFO 2023-01-12T07:21:25.384877891Z done Writing superblocks and filesystem accounting information:
INFO 2023-01-12T07:21:25.385367312Z 0/800
INFO 2023-01-12T07:21:25.387779978Z
INFO 2023-01-12T07:21:25.395778084Z done
ERROR 2023-01-12T07:21:25.497736569Z mount: /mnt/disks/sra-download: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error.
Looks like there is an issue with mounting the new disk, any idea what is going wrong?
Solved! Go to Solution.
Hi mgdurrant,
Having a larger size of boot disk can be achieved via an instance template or defining a large boot disk size with boot_disk_mib in compute resources.
Plz let us know if it works. Thanks!
Wen