We've started using google batch to test some ETL processes. 19 hours in it appeared to be shut down automatically. It's not clear to me what triggered this shutdown, other than it being in a GCE Managed Instance Group. There's no documentation on this instance group and I cannot locate it in the console. Any suggestions on how to launch the instances that either are not preemptible (if that's what happened) or without a timeout.
{
"protoPayload": {
"@type": "type.googleapis.com/google.cloud.audit.AuditLog",
"authenticationInfo": {
"principalEmail": "service-492788363398@gcp-sa-cloudbatch.iam.gserviceaccount.com"
},
"requestMetadata": {
"callerIp": "private",
"callerSuppliedUserAgent": "google-api-go-client/0.5 boq-goa-gce/0.2 (\"boq_cloud-batch-backend-server_20221110.06_p0\")",
"requestAttributes": {
"time": "2022-11-16T10:41:18.546833Z",
"auth": {}
},
"destinationAttributes": {}
},
"serviceName": "compute.googleapis.com",
"methodName": "v1.compute.regionInstanceGroupManagers.delete",
"authorizationInfo": [
{
"permission": "compute.instanceGroupManagers.delete",
"granted": true,
"resourceAttributes": {
"service": "compute",
"name": "projects/moeyens-thor-dev/regions/us-west1/instanceGroupManagers/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
"type": "compute.instanceGroupManagers"
}
},
{
"permission": "compute.instanceGroups.delete",
"granted": true,
"resourceAttributes": {
"service": "compute",
"name": "projects/moeyens-thor-dev/regions/us-west1/instanceGroups/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
"type": "compute.instanceGroups"
}
}
],
"resourceName": "projects/492788363398/regions/us-west1/instanceGroupManagers/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
"request": {
"@type": "type.googleapis.com/compute.regionInstanceGroupManagers.delete",
"requestId": "116e9b6a-7b73-495f-bbb4-23ddb1821836"
},
"response": {
"@type": "type.googleapis.com/operation",
"targetLink": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1/instanceGroupManagers/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
"name": "operation-1668595278137-5ed941cd3ca76-0cdf30f0-a569ddce",
"startTime": "2022-11-16T02:41:18.516-08:00",
"clientOperationId": "116e9b6a-7b73-495f-bbb4-23ddb1821836",
"id": "5870836799187255457",
"operationType": "compute.instanceGroupManagers.delete",
"selfLinkWithId": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1/operations/5870836799187255457",
"region": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1",
"selfLink": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1/operations/operation-1668595278137-5ed941cd3ca76-0cdf30f0-a569ddce",
"progress": "0",
"targetId": "3108924424131055714",
"user": "service-492788363398@gcp-sa-cloudbatch.iam.gserviceaccount.com",
"insertTime": "2022-11-16T02:41:18.502-08:00",
"status": "RUNNING"
},
"resourceLocation": {
"currentLocations": [
"us-west1"
]
}
},
"insertId": "r79yf0e4zmu6",
"resource": {
"type": "gce_instance_group_manager",
"labels": {
"location": "us-west1",
"instance_group_manager_name": "j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
"project_id": "moeyens-thor-dev",
"instance_group_manager_id": "3108924424131055714"
}
},
"timestamp": "2022-11-16T10:41:18.157199Z",
"severity": "NOTICE",
"logName": "projects/moeyens-thor-dev/logs/cloudaudit.googleapis.com%2Factivity",
"operation": {
"id": "operation-1668595278137-5ed941cd3ca76-0cdf30f0-a569ddce",
"producer": "compute.googleapis.com",
"first": true
},
"receiveTimestamp": "2022-11-16T10:41:18.559623638Z"
}
Solved! Go to Solution.
The job is listed as failed. Okay, I finally found the reason in the logs (exit status 137) which implies the job ran OOM.