比较两个电子表格并使用谷歌应用程序脚本输出差异

kur*_*ama 5 google-sheets google-apps-script

好吧,我正在尝试做标题中描述的内容.这两个电子表格只有一张我正在比较的表格.一个电子表格是另一个的更新,所以我只想获得新的内容.(如果它是fc(dos命令)之类的功能,这很容易......)

在做了一些搜索之后,我有了下面的脚本,它应该适用于大多数情况,它使用每个工作表的数组.

function test() {
  var Folder = DriveApp.getFoldersByName('theFolder').next();
  var FolderId =Folder.getId();
  //call old_spreadsheet
  var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";  
  var files = DriveApp.searchFiles(searchFor); 
  var old_file = files.next();   
  var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
  var old_sheet = old_spreadsheet.getSheets()[0];
  var old_sheetname = old_sheet.getName();
  var old_array = old_sheet.getDataRange().getValues();
  Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
  //call spreadsheet
  var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";  
  var files = DriveApp.searchFiles(searchFor); 
  var file = files.next();   
  var spreadsheet = SpreadsheetApp.openById(file.getId());
  var sheet = spreadsheet.getSheets()[0];
  var sheetname = sheet.getName();
  var array = sheet.getDataRange().getValues();
  Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);  
  var newarray = getNewData(array,old_array);
  Logger.log('there are ' + newarray.length + 'different rows');
}

function getNewData(array1,array2){    
  var diff =array2;   
  for (var i = 0; i<array1.length; i++){
    var duplicate = false;
    for (var j = 0;j<diff.length;j++){
      if (array1[i].join() == diff[j].join()){
        Logger.log('duplicated line found on rows ' + i + ':' + j);
        diff.splice(j,1);
        var duplicate= true;
        break;
      }    
    }
    if (duplicate==false) {
      Logger.log('not duplicated line found on row ' + i);
      diff.push(array1[i]);            
    }
  }
  return diff;
}
Run Code Online (Sandbox Code Playgroud)

问题是文件太大,差不多有30000行,因此脚本超出了5分钟的执行限制.

有没有办法改善这一点,例如,消除内部for循环?或者有部分方法可以做到这一点?比如前5000行,依此类推.

问候,

编辑:稍微分析电子表格后,我发现每行都有一个ID,所以现在我只能将搜索集中在每个电子表格的一列中.所以这是我的新实现:

function test(){
var Folder = DriveApp.getFoldersByName('theFolder').next();
  var FolderId =Folder.getId();
  //call old_spreadsheet
  var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";  
  var files = DriveApp.searchFiles(searchFor); 
  var old_file = files.next();   
  var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
  var old_sheet = old_spreadsheet.getSheets()[0];
  var old_sheetname = old_sheet.getName();
  var old_array = old_sheet.getDataRange().getValues();
  Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
  //call spreadsheet
  var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";  
  var files = DriveApp.searchFiles(searchFor); 
  var file = files.next();   
  var spreadsheet = SpreadsheetApp.openById(file.getId());
  var sheet = spreadsheet.getSheets()[0];
  var sheetname = sheet.getName();
  var array = sheet.getDataRange().getValues();
  Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length); 
  //The COlumn has an indicator, so i search for that. I don't control the formatting of the files, so i search in both spreadsheet for the indicator
  var searchString = 'NAME';
  for (var i = 0; i < old_array.length; i++) {    
    for (var j = 0; j < old_array[i].length; j++) {    
      if (old_array[i][j] == searchString) {
        var Row_old = i+1;
        var Column_old = j;
        break;
      }      
    }
    if (Row_old != undefined){
      break;
    }
  }
  for (var i = 0; i < array.length; i++) {    
    for (var j = 0; j < array[i].length; j++) {    
      if (array[i][j] == searchString) {
        var Row = i+1;
        var Column = j;
        break;
      }      
    }
    if (Row != undefined){
      break;
    }
  }

  Logger.log(Row_old+':::'+Column_old+'\n'+Row+':::'+Column);  

  var diff_index =[];
  var row_ind = 0;  
  for (var i=Row;i<array.length;i++){        
    Logger.log(i);
    var existe = ArrayLib.indexOf(old_array, Column_old, array[i][Column]);
    if (existe==-1){      
      Logger.log(row_ind+'!!!');
      diff_index[row_ind]=i;
      row_ind++;          
    }
  }
  Logger.log(diff_index);
}
Run Code Online (Sandbox Code Playgroud)

这仍然没有时间......我现在将尝试合并你的评论.

kur*_*ama 0

最后,我决定选择缓存服务选项,这是代码,我正在测试它,看看我是否坚持这一点。

function getNewData() {
  //deleting triggers
  var triggers = ScriptApp.getProjectTriggers();    
  for (var i = 0; i < triggers.length; i++) {
    if (triggers[i].getHandlerFunction()=='getNewData'){
      ScriptApp.deleteTrigger(triggers[i]);
    }
  }  
  //max running time = 5.5 min
  var MAX_RUNNING_TIME = 330000;
  var startTime= (new Date()).getTime();
  //get cache
  var cache = CacheService.getUserCache();
  var downloaded =JSON.parse(cache.get('downloaded'));  
  var compared =JSON.parse(cache.get('compared'));
  //start
  if (downloaded==1 && compared!=1){
    //folder
    var Folder = DriveApp.getFoldersByName('theFolder').next();
    var FolderId = licitacionesFolder.getId();
    //call old_spreadsheet
    var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";  
    var files = DriveApp.searchFiles(searchFor); 
    var old_file = files.next();  
    var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
    var old_sheet = old_spreadsheet.getSheets()[0];
    var old_array = old_sheet.getDataRange().getValues();
    //call spreadsheet
    var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";  
    var files = DriveApp.searchFiles(searchFor); 
    var file = files.next();  
    var spreadsheet = SpreadsheetApp.openById(old_file.getId());
    var sheet = spreadsheet.getSheets()[0];
    var array = sheet.getDataRange().getValues();
    Logger.log(array.length+'::'+old_array.length); 
    // Column
    var searchString = 'NAME';
    var RC = getColumn(array,searchString);    
    var Row = RC.Row;
    var Column = RC.Column;
    var RC = getColumn(old_array,searchString);    
    var Row_old = RC.Row;
    var Column_old = RC.Column;    
    Logger.log(Row_old+':::'+Column_old+'\n'+Row+':::'+Column);      
    //compare 
    var diff_index =JSON.parse(cache.get('diff_index'));
    var row_ind =JSON.parse(cache.get('row_ind'));  
    var Roww =JSON.parse(cache.get('Row'));  
    if (diff_index==null){var diff_index = [];}
    if (row_ind==null){var row_ind = 0;}
    if (Roww==null){var Roww = Row;}    
    Logger.log(row_ind+'\n'+Roww);    
    for (var i=Roww;i<array.length;i++){  
      var currTime = (new Date()).getTime();
      if(currTime - startTime >= MAX_RUNNING_TIME){
        Logger.log((currTime - startTime)/(1000*60));
        Logger.log(i+'::'+row_ind);
        cache.putAll({'diff_index': JSON.stringify(diff_index),'row_ind': JSON.stringify(row_ind),'Row': JSON.stringify(i-1)},21600);   
        ScriptApp.newTrigger('getNewData').timeBased().after(2 * 60 * 1000).create();
        return;
      } else {
        Logger.log(i);
        var existe = ArrayLib.indexOf(old_array, Column_old, array[i][Column]);
        if (existe==-1){      
          Logger.log(row_ind+'!!!');
          diff_index[row_ind]=i;
          row_ind++;          
        }
      }
    }    
    cache.putAll({'diff_index': JSON.stringify(diff_index),'Row': JSON.stringify(Row),'compared': JSON.stringify(1)},21600);
  } else {
    Logger.log('file not downloaded yet or already compared');
  }
}

function getColumn(array,searchString){
  for (var i = 0; i < array.length; i++) {    
    for (var j = 0; j < array[i].length; j++) {    
      if (array[i][j] == searchString) {
        var Row = i+1;
        var Column = j;
        break;
      }      
    }
    if (Row != undefined){
      break;
    }
  }
  return {Row: Row, Column: Column};
}
Run Code Online (Sandbox Code Playgroud)