【问题标题】:Improve performance in Google App Script Search functions提高 Google App 脚本搜索功能的性能
【发布时间】:2021-11-22 09:51:15
【问题描述】:

我已经使用 Google App Script(在 javascript 中)编写了一个脚本,并且我正在寻找一种方法来最好地优化基于一个或多个搜索字段返回对象的函数。数据存储在 Google 表格中。我的 UI 将参数传递给我的函数,然后我遍历给定的工作表以查找符合条件的行,并将单元格添加到要返回的对象中。返回的可能只是一个对象或对象列表。在大多数情况下,这很好用,但如果我将这种类型的函数嵌套在一个循环中,它真的会拖累性能。任何有关如何提高性能的建议将不胜感激。这是我的代码示例:

function GetAllReportByOrgID_DataLayer_(org_id, reporting_periods) {
    //get all reporting period for program
    var rows = GetDataRows_(DATA_SPREAD_SHEET_ID, RESPONSE_PAGE);
    var surveys = [];   
    for (var i = 1; i < rows.length; i++) {
        var row = rows[i];
        var found_org_id = row[2];
        var found_is_active = row[13];
        if (found_org_id == org_id && found_is_active == true ) {
            var survey = {};
            survey.indicator_id = row[0];
            survey.program_id = row[1];
            survey.org_guid = row[2];
            survey.survey_response = row[3];
            survey.reporting_period = row[5];
            survey.reporting_period_name = GetReportingPeriodNameById_(row[5], reporting_periods);
            survey.is_final_report = row[6];
            survey.is_submitted = row[7];
            survey.submitted_by = row[8];
            survey.submitted_by_email = row[9];
            survey.date_created = ConvertUnixTimeStampToDateTime_(row[10]);
            survey.date_updated = ConvertUnixTimeStampToDateTime_(row[11]);
            survey.fiscal_year = row[12];
            survey.documents = GetDocumentsById_DataLayer_({
                 program_id: row[13]
            });
            surveys.push(survey);
        }
    }
    surveys.success = true;
    return surveys;
}
function GetDataRows_(Sheet_Id, SheetName) {
    var sheet = GetSheet_(Sheet_Id, SheetName);
    var rows = [];
    if (sheet) {
        rows = sheet.getDataRange().getValues();
    }
    return rows;
}
function GetSheet_(Sheet_Id, SheetName) {  
  var ss = SpreadsheetApp.openById(Sheet_Id);
  var sheet = ss.getSheetByName(SheetName);
  return sheet;
}
function GetReportingPeriodNameById_(id, reporting_periods) {
   if (id) {
       for (var i = 0; i < reporting_periods.length; i++) {
           if (reporting_periods[i].id == id) {
              return reporting_periods[i].value
           }
        }
      return "Reporting Period Not Found"
  } else {
    return "Reporting Period Not Found"
 }
}

function GetDocumentsById_DataLayer_(data) {
    var rows = GetDataRows_(DATA_SPREAD_SHEET_ID, PROGAM_DOCUMENTS_PAGE);
    var documents = [];
    var program_id = data.program_id.trim();

    for (var i = 1; i < rows.length; i++) {
        var row = rows[i];
        var found_program_id = row[1];
        var is_active = row[6];
    if(is_active === true){
      if (found_program_id === program_id) {
        var document = {};
        document.document_id = row[0];
        document.program_id = row[1];
        document.document_name = row[2];
        document.file_id = row[3];
        document.file_name = row[4];
        document.file_url = row[5]
        document.date_created = ConvertUnixTimeStampToDateTime_(row[7]);
        document.date_updated = ConvertUnixTimeStampToDateTime_(row[8]);
        documents.push(document);
      }
    }       
    }
    documents.success = true;
    return documents;
}

function ConvertUnixTimeStampToDateTime_(unix_timestamp) {
    if (!unix_timestamp) {
        return "";
    }
    var a = new Date(unix_timestamp * 1000);
    var months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
    var year = a.getFullYear();
    var month = months[a.getMonth()];
    var date = a.getDate();
    var hour = a.getHours();
    var min = a.getMinutes();
    var sec = a.getSeconds();
    var time = a.getMonth() + "/" + date + "/" + year + " " + hour + ":" + min + ":" + sec;
    return time;
}

这段代码大部分都可以正常工作,除非它在一个被调用 100 次左右的循环中,然后事情会滞后并且可能需要一分钟或更长时间来处理。工作表并没有那么大,不到 200 行和 15 列。
谢谢

【问题讨论】:

  • 你的问题太笼统了。典型答案将被阅读并实施最佳实践,如果您需要其他帮助,请返回。您似乎有几个帮助函数,我将它们包含在一个函数中只是为了节省时间。
  • ConvertUnixTimeStampToDateTime_(row[7]); 未定义
  • @Cooper 我不认为值得投入,因为它只是转换 Unix 时间。我可以添加它。但更大的问题似乎是嵌套调用。
  • 无论你提供什么都应该满足minimal reproducible example的要求。我会说您应该删除所有辅助函数,尤其是当它们调用其他函数以最小化运行时。
  • 您是否在使用 HTML 页面并在应用程序脚本中调用服务器端函数?关于这个function nested in a loop it can really drag the performance 是你客户端的循环吗?正如@Cooper 所提到的,如果您能提供minimal reproducible example 以供我们验证可能提出的解决方案是否会提高您当前的性能,我将不胜感激。我基本上很好奇嵌套循环发生在哪里

标签: javascript jquery performance google-apps-script google-sheets


【解决方案1】:

性能下降的原因是GetDataRows_函数,因为它反复调用Spreadsheet.openById()ss.getSheetByName(SheetName);。您可以尝试使用全局 map 来缓存这些对象。例如,使用Spreadsheet 映射

const globalSSMap = new Map();//global SS map

然后你的 getter 函数可以被重写。例如,要获取ss,请使用

if(!globalSSMap.has(Sheet_id)) globalSSMap.set(Sheet_id,SpreadsheetApp.openById(Sheet_Id));
var ss = globalSSMap.get(Sheet_id);

【讨论】:

  • 感谢您的建议。我实施了您建议的更改。它有助于剃掉几秒钟,但仅此而已。尽管如此,每一点都有帮助。谢谢
  • @jason 这只是一个示例策略。您需要为 sheetrows 实现相同的功能以进行大规模改进。
【解决方案2】:

你可以参考这个示例代码:

Data_Layer.gs

function GetAllDataByManager_DataLayer_(loggedin_user) {
  var ss = SpreadsheetApp.openById(DATA_SPREAD_SHEET_ID);

  var sheets = ss.getSheets();
  // create sheet objects
  var sheetData = {};
  sheets.forEach(sheet => {
    sheetData[sheet.getName()] = sheet.getDataRange().getValues();
  })

    var program_rows = sheetData[PROGRAM_MANAGERS];
    var ip_ids = [];
    var Company_data = [];
  var runtimeCountStart = new Date();

  //first search to make sure logged in user exists, and is marked as Active == True and is marked as an Admin
  //if they exist, add their ID to the list 
  //Should only see records for the logged in user if
  //they are either listed as the primary manager  (company tab)
  //or they are listed as an additional manager (program managers tab)

    for (var i = 1; i < program_rows.length; i++) {
        var row = program_rows[i];
        var found_admin_email = row[2];
        var found_is_active = row[10];
        if (found_admin_email == loggedin_user && found_is_active == true) {
            ip_ids.push(row[1])
        }
    }

  var partner_rows = sheetData[PARTNER_PAGE];  
  for (var i = 1; i < partner_rows.length; i++) {
    var partner_row = partner_rows[i]
    var found_partner_id = partner_row[0];
    var add_record = false;
    if(ip_ids.includes(found_partner_id)){
      add_record = true;
    }else{
      var found_cor_email = partner_row[5]
      if(loggedin_user.toUpperCase() == found_cor_email.toUpperCase()){
        add_record = true;
      }
    }    
    if(add_record == true){
      var partner = {
                    ip_id: partner_row[0],
                    ip_name: partner_row[1],
                    ip_poc_name: partner_row[2],
                    ip_poc_email: partner_row[3],
                    manager_name: partner_row[4],
                    manager_email: partner_row[5],
                    is_active: partner_row[6],
                    date_created: partner_row[7],
                    created_by: partner_row[8],
                    partner_programs:  GetAllProgramDataByIP_DataLayer_(sheetData, found_partner_id),  
          partner_notes: GetProgramNarrativesByPartnerID_DataLayer_(sheetData, found_partner_id),
          partner_reports: GetAllReportByPartnerID_DataLayer_(sheetData, found_partner_id)       
                };
                Company_data.push(partner)
    }

  }

  stop = new Date();
  newRuntime = Number(stop) - Number(runtimeCountStart);
  newRuntime  = (newRuntime /1000)/60

    return Company_data;

}
function GetAllProgramDataByIP_DataLayer_(sheetData, ip_id) {
    var rows = sheetData[PROGRAM_PAGE];
    var programs = [];
    for (var i = 1; i < rows.length; i++) {
        var row = rows[i];
        var found_partner_id = row[1];
        var program = {}
        if (found_partner_id === ip_id) {      
            program.program_id = row[0].toString();
            program.partner_id = row[1].toString();
            program.program_name = row[2].toString();
            program.program_country = row[3].toString();        
            program.program_background = row[4].toString();
            program.program_objectives = row[5].toString();
            program.program_justification = row[6].toString();
            program.program_start_date = row[7].toString();
      program.program_status = row[8].toString();
            program.program_contract_number = row[9].toString();            
            program.is_active = row[10]
            program.date_created = ConvertUnixTimeStampToDateTime_(row[11].toString());
            program.date_updated = ConvertUnixTimeStampToDateTime_(row[12].toString());
            program.success = true;
            programs.push(program)
        }

    }
    return programs;
}
function GetProgramNarrativesByPartnerID_DataLayer_(sheetData, partner_id) {
    var rows = sheetData[PROGRAM_NARRATIVE_NOTE];
    var programs_notes = [];
    var response = {};
    for (var i = 1; i < rows.length; i++) {
        var row = rows[i];
        var found_partner_id = row[2];
        var is_active = row[7];
    if(is_active === true){
      if (found_partner_id === partner_id) {
        var note = {};
        note.note_id = row[0];
        note.program_id = row[1];
        note.company_guid = row[2];        
        note.note_title = htmlEscape_(row[3]);
        note.note_desc = htmlEscape_(row[4]);
        note.note_reportingPeriod = row[5];
        note.activity_theme = row[6];
        note.date_created = ConvertUnixTimeStampToDateTime_(row[8]);
        note.date_updated = ConvertUnixTimeStampToDateTime_(row[9]);
        programs_notes.push(note);
      }
    }       
    }
    response.success = true;
    response.programs_notes = programs_notes
    return response;
}
function GetAllReportByPartnerID_DataLayer_(sheetData, partner_id) {
    //get all reporting period for program
    var rows = sheetData[RESPONSE_PAGE];  
    var surveys = [];   
    for (var i = 1; i < rows.length; i++) {
        var row = rows[i];
        var found_partner_id = row[2];
        if (found_partner_id == partner_id) {
            var survey = {};
            survey.indicator_id = row[0];
            survey.program_id = row[1];
            survey.company_guid = row[2];
            survey.survey_response = row[3];
            survey.reporting_period = row[5];
            survey.is_final_report = row[6];
            survey.is_submitted = row[7];
            survey.submitted_by = row[8];
            survey.submitted_by_email = row[9];
            survey.date_created = ConvertUnixTimeStampToDateTime_(row[10]);
            survey.date_updated = ConvertUnixTimeStampToDateTime_(row[11]);
            survey.fiscal_year = row[12];
            surveys.push(survey);
        }
    }
    surveys.success = true;
    return surveys;
}

Helper_Functions.gs

function ConvertUnixTimeStampToDateTime_(unix_timestamp) {
    if (!unix_timestamp) {
        return "";
    }
    var a = new Date(unix_timestamp * 1000);
    var months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
    var year = a.getFullYear();
    var month = months[a.getMonth()];
    var date = a.getDate();
    var hour = a.getHours();
    var min = a.getMinutes();
    var sec = a.getSeconds();
    var time = a.getMonth() + "/" + date + "/" + year + " " + hour + ":" + min + ":" + sec;
    return time;
}

function CreateGUID_() {
    return Utilities.getUuid();
}
function htmlEscape_(str) {
  str = str.toString();
    if (str && str.length > 0) {
        return str.replace(/<[^>]+>/g, "")
    } else {
        return "";
    }
}

注意:

  • 我没有在共享脚本中看到GetDocumentsById_DataLayer_survey.documents
  • 我只是在您的主函数 GetAllDataByManager_DataLayer_() 中寻找可以改进的地方,当您检查 partner_rows 变量时会出现嵌套循环。
  • 对于您的嵌套循环检查,我认为我们无能为力了。

修改完成:

  • 尽管您阻止重复调用 Spreadsheet.openById(),但您在映射工作表对象时仍会进行单独调用,因为您使用 ss.getSheetByName(SheetName); 来获取基于 SheetName 的单个工作表对象,甚至在每次读取工作表值循环 3 个不同的工作表
  • 我所做的是使用getSheets() 获取电子表格中可用的所有工作表对象,获取其数据范围值,然后根据工作表名称映射它们。正如@TheMaster 在另一个答案中所建议的那样
  • 删除Helper_Functions.gs中不必要的功能

执行日志:

  • 有关代码更改,请参阅最近的 2 个 Web 应用程序执行日志。将其与第一次执行进行比较

【讨论】:

  • 如果您在实施过程中遇到一些问题,请告诉我,以便我可以相应地更新答案
猜你喜欢
  • 1970-01-01
  • 2013-09-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2018-12-10
  • 2010-11-09
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多