Get hands-on experience with 20+ free Google Cloud products and $300 in free credit for new customers.

Google Batch instance group preemption or timeout

We've started using google batch to test some ETL processes. 19 hours in it appeared to be shut down automatically. It's not clear to me what triggered this shutdown, other than it being in a GCE Managed Instance Group. There's no documentation on this instance group and I cannot locate it in the console. Any suggestions on how to launch the instances that either are not preemptible (if that's what happened) or without a timeout.

 

{
  "protoPayload": {
    "@type": "type.googleapis.com/google.cloud.audit.AuditLog",
    "authenticationInfo": {
      "principalEmail": "service-492788363398@gcp-sa-cloudbatch.iam.gserviceaccount.com"
    },
    "requestMetadata": {
      "callerIp": "private",
      "callerSuppliedUserAgent": "google-api-go-client/0.5 boq-goa-gce/0.2 (\"boq_cloud-batch-backend-server_20221110.06_p0\")",
      "requestAttributes": {
        "time": "2022-11-16T10:41:18.546833Z",
        "auth": {}
      },
      "destinationAttributes": {}
    },
    "serviceName": "compute.googleapis.com",
    "methodName": "v1.compute.regionInstanceGroupManagers.delete",
    "authorizationInfo": [
      {
        "permission": "compute.instanceGroupManagers.delete",
        "granted": true,
        "resourceAttributes": {
          "service": "compute",
          "name": "projects/moeyens-thor-dev/regions/us-west1/instanceGroupManagers/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
          "type": "compute.instanceGroupManagers"
        }
      },
      {
        "permission": "compute.instanceGroups.delete",
        "granted": true,
        "resourceAttributes": {
          "service": "compute",
          "name": "projects/moeyens-thor-dev/regions/us-west1/instanceGroups/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
          "type": "compute.instanceGroups"
        }
      }
    ],
    "resourceName": "projects/492788363398/regions/us-west1/instanceGroupManagers/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
    "request": {
      "@type": "type.googleapis.com/compute.regionInstanceGroupManagers.delete",
      "requestId": "116e9b6a-7b73-495f-bbb4-23ddb1821836"
    },
    "response": {
      "@type": "type.googleapis.com/operation",
      "targetLink": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1/instanceGroupManagers/j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
      "name": "operation-1668595278137-5ed941cd3ca76-0cdf30f0-a569ddce",
      "startTime": "2022-11-16T02:41:18.516-08:00",
      "clientOperationId": "116e9b6a-7b73-495f-bbb4-23ddb1821836",
      "id": "5870836799187255457",
      "operationType": "compute.instanceGroupManagers.delete",
      "selfLinkWithId": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1/operations/5870836799187255457",
      "region": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1",
      "selfLink": "https://www.googleapis.com/compute/v1/projects/moeyens-thor-dev/regions/us-west1/operations/operation-1668595278137-5ed941cd3ca76-0cdf30f0-a569ddce",
      "progress": "0",
      "targetId": "3108924424131055714",
      "user": "service-492788363398@gcp-sa-cloudbatch.iam.gserviceaccount.com",
      "insertTime": "2022-11-16T02:41:18.502-08:00",
      "status": "RUNNING"
    },
    "resourceLocation": {
      "currentLocations": [
        "us-west1"
      ]
    }
  },
  "insertId": "r79yf0e4zmu6",
  "resource": {
    "type": "gce_instance_group_manager",
    "labels": {
      "location": "us-west1",
      "instance_group_manager_name": "j-dc1d5d7b-6a29-4616-b337-4efd3a6bfc2d-group0-0",
      "project_id": "moeyens-thor-dev",
      "instance_group_manager_id": "3108924424131055714"
    }
  },
  "timestamp": "2022-11-16T10:41:18.157199Z",
  "severity": "NOTICE",
  "logName": "projects/moeyens-thor-dev/logs/cloudaudit.googleapis.com%2Factivity",
  "operation": {
    "id": "operation-1668595278137-5ed941cd3ca76-0cdf30f0-a569ddce",
    "producer": "compute.googleapis.com",
    "first": true
  },
  "receiveTimestamp": "2022-11-16T10:41:18.559623638Z"
}

 

 

Solved Solved
0 5 1,083
1 ACCEPTED SOLUTION

The job is listed as failed. Okay, I finally found the reason in the logs (exit status 137) which implies the job ran OOM.

View solution in original post