Fetching and parsing data from email´s body to my google sheet.

Former Community Member
Not applicable

I'm working with a Google Sheets script that extracts data from email bodies. While the script successfully pastes data from most columns, it fails to paste the values from columns G and H. These columns contain text with hyperlinks to google drive.

Expected Behavior:

The script should copy and paste the complete value from columns G and H, including the text and the associated hyperlink.

Current Behavior:

The script doesn't even copy the plain text portion of the values in columns G and H, omitting the hyperlinks altogether.

Mail Body Example:

SUPPLIER: {cacho}
SERVICE_TYPE: {CD-001 Media buying (TV, press, Radio etc...)}
PROFORMA: {x}
INVOICE: {x}
UPLOADED_DATE: {15/07/2024}

In this example, 'x' represents the text with a hyperlink (To google drive) attached. The script should copy and paste the entire value, including 'x' and the hyperlink.


function processEmails() {
  try {
    var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();

    if (!sheet) {
      throw new Error('Active sheet not found');
    }
    Logger.log('Opened active spreadsheet successfully.');

    // Get unread messages from the "PRUEBAS" label
    var label = GmailApp.getUserLabelByName('PRUEBAS');
    if (!label) {
      throw new Error('Label "PRUEBAS" not found');
    }
    Logger.log('Retrieved label successfully.');

    var threads = label.getThreads();
    Logger.log('Found ' + threads.length + ' threads.');
    
    for (var i = 0; i < threads.length; i++) {
      var thread = threads[i];
      var messages = thread.getMessages();
      
      // Sort the thread messages by date, from oldest to most recent
      messages.sort(function(a, b) {
        return a.getDate() - b.getDate();
      });

      var combinedData = {};

      for (var j = 0; j < messages.length; j++) {
        var message = messages[j];

        // Only process unread messages
        if (!message.isUnread()) {
          continue;
        }

        var body = getMessageBody(message);
        if (!body) {
          continue;
        }

        Logger.log('Message body: ' + body);  // Log the entire message body

        var events = extractEvents(body, message);
        Logger.log('Extracted ' + events.length + ' events from message body.');
        events.forEach(function(data) {
          var key = data['EVENT_NAME'] + data['SUPPLIER'] + data['UPLOADED_DATE'];
          if (!combinedData[key]) {
            combinedData[key] = data;
            Logger.log('Added data for key: ' + key);
          } else {
            combinedData[key] = mergeData(combinedData[key], data);
            Logger.log('Merged data for key: ' + key);
          }
        });

        // Mark message as read after processing
        message.markRead();
        Logger.log('Marked message as read: ' + message.getSubject());
      }

      for (var key in combinedData) {
        var data = combinedData[key];
        var rowToUpdate = findRow(sheet, data);
        if (rowToUpdate === -1) {
          updateSheet(sheet, data);
          Logger.log('Appended new row for key: ' + key);
        } else {
          updateRow(sheet, rowToUpdate, data);
          Logger.log('Updated row ' + rowToUpdate + ' for key: ' + key);
        }
      }
    }
    
  } catch (e) {
    Logger.log('Error: ' + e.message);
  }
}

function getMessageBody(message) {
  var body = message.getPlainBody();
  Logger.log('Plain body: ' + body);  // Log the plain body
  if (!body) {
    body = message.getRawContent();
    Logger.log('Raw content: ' + body);  // Log the raw content
    if (body) {
      body = convertHtmlToPlainText(body);
      Logger.log('Converted HTML to plain text: ' + body);  // Log the converted plain text
    }
  }
  if (!body) {
    Logger.log('Message body is empty.');
    return null;
  }
  Logger.log('Retrieved message body.');
  return body;
}

function convertHtmlToPlainText(html) {
  var plainText = HtmlService.createHtmlOutput(html).getContent().replace(/<[^>]*>?/gm, '');
  Logger.log('Converted HTML to plain text: ' + plainText);
  return plainText;
}

function extractEvents(body, message) {
  var events = [];
  Logger.log('Starting to extract events from body');
  var sections = body.split(/SUPPLIER: \{/);
  Logger.log('Number of sections found: ' + sections.length);
  if (sections.length < 2) {
    Logger.log('No supplier sections found in the body.');
    return events;
  }

  let globalEventData = {};
  let firstSectionLines = sections[0].split('\n');
  firstSectionLines.forEach(function(line) {
    let parts = line.split(':');
    if (parts.length === 2) {
      let key = parts[0].trim().toUpperCase().replace(/ /g, '_');
      let match = parts[1].match(/\{\s*([^}]+)\s*\}/);
      if (match) {
        globalEventData[key] = match[1].trim();
      }
    }
  });

  let eventName = globalEventData['EVENT_NAME'];  // Get the EVENT_NAME once

  for (var i = 1; i < sections.length; i++) {
    let section = sections[i];
    let data = Object.assign({}, globalEventData);
    section = 'SUPPLIER: {' + section;  // Re-add the split string
    let lines = section.split('\n');
    lines.forEach(function(line) {
      let parts = line.split(':');
      if (parts.length === 2) {
        let key = parts[0].trim().toUpperCase().replace(/ /g, '_');
        let match = parts[1].match(/\{\s*([^}]+)\s*\}/);
        if (match) {
          let value = match[1].trim();
          if (key === 'UPLOADED_DATE' || key === 'DATE_OF_EXCHANGE' || key.startsWith('PAYMENT_DATE')) {
            value = parseDate(value);
          }
          data[key] = value;
        }
      }
    });
    data['EVENT_NAME'] = eventName;  // Apply the EVENT_NAME to each supplier
    if (data['SUPPLIER'] && data['UPLOADED_DATE']) {
      events.push(data);
    } else {
      Logger.log('Missing SUPPLIER or UPLOADED_DATE in section');
    }
  }

  Logger.log('Extracted ' + events.length + ' events from message body.');
  return events;
}

function mergeData(oldData, newData) {
  var merged = {};
  for (var key in oldData) {
    merged[key] = oldData[key];
  }
  for (var key in newData) {
    if (newData[key] && (!oldData[key] || newData[key] !== oldData[key])) {
      merged[key] = newData[key];
    }
  }
  Logger.log('Merged data.');
  return merged;
}

function findRow(sheet, data) {
  var range = sheet.getDataRange();
  var values = range.getValues();
  var targetDate = formatDate(new Date(data['UPLOADED_DATE'])); // Format date to dd/MM/yyyy
  
  for (var i = 1; i < values.length; i++) {
    if (values[i][1] === '') { // Stop checking if the row is empty
      break;
    }
    var eventName = values[i][1];
    var supplier = values[i][4];
    var uploadedDate = formatDate(new Date(values[i][9])); // Format date to dd/MM/yyyy
    Logger.log('Checking row ' + (i + 1) + ': EVENT_NAME=' + eventName + ', SUPPLIER=' + supplier + ', UPLOADED_DATE=' + uploadedDate);
    if (eventName === data['EVENT_NAME'] && supplier === data['SUPPLIER'] && uploadedDate === targetDate) {
      Logger.log('Found existing row for data at row ' + (i + 1) + '.');
      return i + 1;
    }
  }
  Logger.log('No existing row found for data: EVENT_NAME=' + data['EVENT_NAME'] + ', SUPPLIER=' + data['SUPPLIER'] + ', UPLOADED_DATE=' + targetDate);
  return -1;
}

function updateSheet(sheet, data) {
  var lastRow = sheet.getLastRow();
  var range = sheet.getRange(2, 2, lastRow, 1); // Check from column B onwards
  var values = range.getValues();
  
  for (var i = 0; i < values.length; i++) {
    if (values[i][0] === '') { // Check if column B is empty
      var row = sheet.getRange(i + 2, 2, 1, 26); // From column B to AA
      row.setValues([[
        data['EVENT_NAME'],                // B
        '',                                // C (keep empty)
        data['OFFER_STATUS'] || '',        // D
        data['SUPPLIER'],                  // E
        data['SERVICE_TYPE'] || '',        // F
        data['PROFORMA'] || '',            // G
        data['INVOICE'] || '',             // H
        data['COORDINATOR'],               // I
        formatDate(new Date(data['UPLOADED_DATE'])), // J (formatted to dd/MM/yyyy)
        data['EXCHANGE_ISO_A3_CODE'] || '',// K
        data['DATE_OF_EXCHANGE'] || '',    // L
        data['ORIGINAL_CURRENCY'] || '',   // M
        data['AMOUNT_IN_EUROS'] || '',     // N
        data['PAYMENT_REASON'] || '',      // O
        data['AMOUNT_EURO_1'] || '',       // P
        data['AMOUNT_ORIGINAL_CURRENCY_1'] || '', // Q
        data['PAYMENT_METHOD'] || '',      // R
        data['PAYMENT_LINK'] || '',        // S
        data['PAYMENT_DATE_1'] || '',      // T
        data['AMOUNT_EURO_2'] || '',       // U
        data['AMOUNT_ORIGINAL_CURRENCY_2'] || '', // V
        data['PAYMENT_METHOD_2'] || '',    // W
        data['PAYMENT_LINK_2'] || '',      // X
        data['PAYMENT_DATE_2'] || '',      // Y
        data['COMMENTS'] || '',            // Z
        ''                                 // AA (PAYMENT_STATUS)
      ]]);

      Logger.log('Added new row at position ' + (i + 2));
      return;
    }
  }
  
  // If no empty row is found, append to the end
  var appendedRow = sheet.appendRow([
    data['EVENT_NAME'],                // B
    '',                                // C (keep empty)
    data['OFFER_STATUS'] || '',        // D
    data['SUPPLIER'],                  // E
    data['SERVICE_TYPE'] || '',        // F
    data['PROFORMA'] || '',            // G
    data['INVOICE'] || '',             // H
    data['COORDINATOR'],               // I
    formatDate(new Date(data['UPLOADED_DATE'])), // J (formatted to dd/MM/yyyy)
    data['EXCHANGE_ISO_A3_CODE'] || '',// K
    data['DATE_OF_EXCHANGE'] || '',    // L
    data['ORIGINAL_CURRENCY'] || '',   // M
    data['AMOUNT_IN_EUROS'] || '',     // N
    data['PAYMENT_REASON'] || '',      // O
    data['AMOUNT_EURO_1'] || '',       // P
    data['AMOUNT_ORIGINAL_CURRENCY_1'] || '', // Q
    data['PAYMENT_METHOD'] || '',      // R
    data['PAYMENT_LINK'] || '',        // S
    data['PAYMENT_DATE_1'] || '',      // T
    data['AMOUNT_EURO_2'] || '',       // U
    data['AMOUNT_ORIGINAL_CURRENCY_2'] || '', // V
    data['PAYMENT_METHOD_2'] || '',    // W
    data['PAYMENT_LINK_2'] || '',      // X
    data['PAYMENT_DATE_2'] || '',      // Y
    data['COMMENTS'] || '',            // Z
    ''                                 // AA (PAYMENT_STATUS)
  ]);

  Logger.log('Appended new row to sheet.');
}

function updateRow(sheet, rowToUpdate, data) {
  var currentRow = sheet.getRange(rowToUpdate, 1, 1, sheet.getLastColumn()).getValues()[0];
  
  var row = sheet.getRange(rowToUpdate, 2, 1, 26);
  row.setValues([[
    data['EVENT_NAME'] || currentRow[1],        // B
    currentRow[2],                              // C (keep empty)
    data['OFFER_STATUS'] || currentRow[3],      // D
    data['SUPPLIER'] || currentRow[4],          // E
    data['SERVICE_TYPE'] || currentRow[5],      // F
    data['PROFORMA'] || currentRow[6],          // G
    data['INVOICE'] || currentRow[7],           // H
    data['COORDINATOR'] || currentRow[8],       // I
    formatDate(new Date(data['UPLOADED_DATE'])) || currentRow[9], // J (formatted to dd/MM/yyyy)
    data['EXCHANGE_ISO_A3_CODE'] || currentRow[10], // K
    data['DATE_OF_EXCHANGE'] || currentRow[11], // L
    data['ORIGINAL_CURRENCY'] || currentRow[12],// M
    data['AMOUNT_IN_EUROS'] || currentRow[13],  // N
    data['PAYMENT_REASON'] || currentRow[14],   // O
    data['AMOUNT_EURO_1'] || currentRow[15],    // P
    data['AMOUNT_ORIGINAL_CURRENCY_1'] || currentRow[16], // Q
    data['PAYMENT_METHOD'] || currentRow[17],   // R
    data['PAYMENT_LINK'] || currentRow[18],     // S
    data['PAYMENT_DATE_1'] || currentRow[19],   // T
    data['AMOUNT_EURO_2'] || currentRow[20],    // U
    data['AMOUNT_ORIGINAL_CURRENCY_2'] || currentRow[21], // V
    data['PAYMENT_METHOD_2'] || currentRow[22], // W
    data['PAYMENT_LINK_2'] || currentRow[23],   // X
    data['PAYMENT_DATE_2'] || currentRow[24],   // Y
    data['COMMENTS'] || currentRow[25],         // Z
    currentRow[26]                              // AA (PAYMENT_STATUS)
  ]]);

  Logger.log('Updated row ' + rowToUpdate + ' in sheet.');
}

function formatCurrencyColumns(sheet, rowIndex, data) {
  const currencyColumns = ['N', 'P', 'Q', 'U', 'V'];
  currencyColumns.forEach(col => {
    let cell = sheet.getRange(col + rowIndex);
    let value = data[col];
    if (!isNaN(parseFloat(value)) && isFinite(value) && value !== '') {
      cell.setValue(parseFloat(value));
      cell.setNumberFormat('#,##0.00"€"'); // Use this format for point as thousand separator and comma as decimal
    }
  });
}

function formatDate(date) {
  var day = ('0' + date.getDate()).slice(-2);
  var month = ('0' + (date.getMonth() + 1)).slice(-2);
  var year = date.getFullYear();
  return day + '/' + month + '/' + year;
}

function parseDate(input) {
  var formats = [
    {regex: /^\d{2}[-\/.]\d{2}[-\/.]\d{4}$/, format: 'dd/MM/yyyy'}, // European format
    {regex: /^\d{4}[-\/.]\d{2}[-\/.]\d{2}$/, format: 'yyyy/MM/dd'}, // American format
  ];

  for (var i = 0; i < formats.length; i++) {
    if (formats[i].regex.test(input)) {
      var parts = input.split(/[-\/.]/).map(part => part.trim());
      if (formats[i].format === 'dd/MM/yyyy') {
        return new Date(parts[2], parts[1] - 1, parts[0]);
      } else if (formats[i].format === 'yyyy/MM/dd') {
        return new Date(parts[0], parts[1] - 1, parts[2]);
      }
    }
  }
  
  Logger.log('Invalid date format for input: ' + input);
  return new Date('Invalid Date');
}

Execution Log:

0:52:01 Información Checking row 2: EVENT_NAME=PRUEBA, SUPPLIER=cacho, UPLOADED_DATE=15/07/2024
0:52:01 Información Found existing row for data at row 2.
0:52:02 Información Updated row 2 in sheet with PROFORMA: undefined, INVOICE: undefined
0:52:02 Información Updated row 2 for key: PRUEBAcachoMon Jul 15 2024 00:00:00 GMT+0200 (Central European Summer Time) with PROFORMA: undefined, INVOICE: undefined

Any help will be highly appreciated!!!!!!!!! 

0 0 172
0 REPLIES 0
Top Labels in this Space