【问题标题】:How to speed up search while parsing a CSV file in Jquery or Javascript?如何在 Jquery 或 Javascript 中解析 CSV 文件时加快搜索速度?
【发布时间】:2020-12-11 10:44:48
【问题描述】:

我有一个 CSV 文件,其中包含大约 40K 条目和 3 列 - 值、城市和类别

以下是结构:

Value    Class    City
111      lev 0    New York
112      lev 1    Winston
113      lev 2    Dakota
114      lev 2    Washington

class lev-0 是 level-1 的父级,它是 lev-2 的父级 (lev-0>lev-1>lev-2)

现在我有 3 个选择框父区域、子区域、子子区域

我为返回所选值的父区域设置了一个 onchange 侦听器,然后在 CSV 文件中进行搜索,一旦将其下方的所有元素与 class= lev-1 匹配,就用于为选择框子项创建选项列表使用 while 循环的区域,直到 class= lev-0 是循环停止的位置。

问题是,一旦我选择了一个元素,选择框需要大约 3-4 秒才能被填充,而且很多时候它甚至会挂起浏览器。

我正在使用 Papa Parser 来解析 CSV

const csv = "https://example.com/locations.csv";
let results = [];
const csvData = Papa.parse(csv, {
  header: true,
  download: true,
  complete: response => {
    results = response.data;
  }
});

以下是使用值作为输入获取类名和城市名的函数

function findCityByName(inputVal) {
  return results.filter(data => data.Value == inputVal)[0].City;
  }
function findClassbyValue(inputVal) {
  return results.filter(data => data.Value == inputVal)[0].Class;
  }

以下是为选择框子区域创建选项值的代码

function GetChildRegions(inputVal){
var inputVal2 = inputVal;
var currentObject = results.filter(data => data.Value == inputVal2)[0];
currentIndex = results.indexOf(currentObject);
//move to next object
currentObject =  results.filter(data => data)[currentIndex + 1];
currentClass = findClassbyValue(currentObject.Value);
var RegionDropOption;

while (currentClass != 'level-0') {
  
  if(currentClass == 'level-1'){
  RegionDropOption += '<option value="' + currentObject.Value + '">' + currentObject.City + '</option>\n';
  currentIndex = results.indexOf(currentObject);
  currentIndex++;
  currentObject = results.filter(data => data)[currentIndex];
  currentClass = findClassbyValue(currentObject.Value);
  }
}
return RegionDropOption;
}

以下是在父区域更改事件时在子区域选择框中注入html代码的jquery

jQuery("#Parent_region").change(function() {
var selectedMainRegion = jQuery('#Parent_region').find(":selected").val();
jQuery("#child_region").html(GetChildRegions(selectedMainRegion));  

});

如何让搜索更快?

编辑:

添加缓存(记忆)

Momoizer 功能

function memoizer(fun){
    let cache = {}
    return function (n){
        if (cache[n] != undefined ) {
          return cache[n]
        } else {
        console.log(n);
          **let result = fun(n)  // Result is Undefined here** 
          cache[n] = result
          return result
        }
    }
}

被记忆的功能。

function GetSubRegion(selectedMainRegion){
if (typeof(subRegWorker) != "undefined") {
subRegWorker.terminate();
}
subRegWorker = new Worker("subRegWorker.js");
subRegWorker.onmessage = function(e) {
var workData = e.data;
jQuery("#_sub_region").html(workData);
subRegWorker.terminate();
return workData;
}
subRegWorker.postMessage(selectedMainRegion);
}

示例 - var hello = GetSubRegion(123); 你好在这里是未定义的。怎么解决?

在变化时调用记忆函数

jQuery("#_main_region").change(function() {
var getCacheSub = memoizer(GetSubRegion);
var inputMainRegion = jQuery('#_main_region').find(":selected").val();
getCacheSub(inputMainRegion);
});

【问题讨论】:

    标签: javascript jquery json performance parsing


    【解决方案1】:

    js 是单线程。这个长循环是块计算机。你需要的是工人。 工人正在使用另一个线程。例子

    // main code
    if ('serviceWorker' in navigator) { // check for browserr support
        var work = new Worker("http://localhost:5500/worker.js") // you need another js file for worker
        work.postMessage("message") // send any data
        work.addEventListener("wessage",function(e){
            console.log(e.data) // response of worker
        })
    }
    
    // worker file
    
    self.postMessage("message"); // worker to main communicating 
    
    self.addEventListener("message", function(event) {
    console.log("Message from parent:", event.data); // main to worker communicating
    });
    

    如果您不想制作第二个文件。 https://github.com/keithwhor/multithread.js

    自己翻译

    // worker
    /**
     * optimized class
     * logic is save the answer. for to be faster next time.
     * @param {array} results
     * @constructor
     */
    class csvParseOpti {
        constructor() {
            this.caches = []; // cache response for optimization.
            //  ex. caches:[{City:{inpVal:"Ankara",res:"0"},Class:blabal}]
            this.results; // parsed csv by Papa
            /** [
             {City: "Ankara", Class: "1", Value: 111},
             {City: "Mersin", Class: "1", Value: 112},
             {City: "İzmir", Class: "0", Value: 113}
             ] temp table
             */
            this.talkWidthMain();
        }
    
        /**
         * write talkWidthMain part for this function
         * @parm {string} csv
         */
        runPapa(csv) {
            let results = [];
            Papa.parse(csv, {
                header: true,
                download: true,
                worker: true,
                fastMode: true,
                complete: response => {
                    results = response.data;
                }
            });
            return true;
        }
    
        /*
        * talk width main thread
        * */
        talkWidthMain() {
            self.addEventListener("message", (msg) => { // listen main
                console.log(msg)
                if (msg.data.func == "city") { // eğer ana çekidekden gelen mesaj da city varsa findCityByName()'i çalıştır
                    self.postMessage(this.findCityByName(msg.data.value)); // komutun çıktısın döndür.
                    // eğer istersen idde gönderebilirsin. cavpalrın karışmaması için.
                } // else diyer fonkisyonlar için aynısı
            })
        }
    
    
        /**
         * search in cache if not exist finds in array and push in cache.
         * do this for anothe functions
         * @param {string} inpVal
         * @public
         * @return {string}
         * */
        findCityByName(inpVal) { // your find function
            let cache = this.caches.filter(cache => cache.City.inpVal == inpVal); // check if this is used before
            if (cache && cache.length > 0) { // if is used return end of this function
                return cache[0].City.res;
            }
            // Not: if cache.City an cache.Class leangth's sum ara same as len of result. set null cahe
            cache = this.results.filter(data => data.City == inpVal)//[0].City; // else find in big array
            this.caches.push({City: {inpVal, res: cache}}); // and push in to caches
            return cache; // vala
        }
    }
    
    new csvParseOpti();
    // main 
    if ('serviceWorker' in navigator) { // check for browserr support
        var work = new Worker("opt.js") // you need another js file for worker
        work.postMessage({func:"city",value:"Mersin"}) // send any data
        work.addEventListener("message",function(e){
            console.log(e) // response of worker
        })
    }
    

    这只是缓存响应。

    【讨论】:

    • 感谢您的回复。工人解决了浏览器页面崩溃的问题,但搜索时间仍然是 2-3 秒。无论如何我可以加快搜索速度或任何方式来改进上述搜索循环以提高搜索速度?
    • 你能用 console.time 调试你的代码吗?哪个部分需要很长时间。 papa 是为了支持工人并有快速模式。阅读文档。papaparse.com/docs
    • 我在 papa parse 中添加了 worker 和 fast 标签。我检查了 console.time 和 Papa Parse 只需要大约 150 毫秒来下载和解析文档。 while 循环本身平均需要 3000 毫秒才能完成。我将计时器放在循环内,每个循环周期平均需要 3 毫秒才能完成。有办法减少吗?
    • 你能把csv发给我吗
    • 你使用缓存了吗?
    【解决方案2】:

    感谢@eay 指导使用工人和记忆

    缓存问题解决如下-

        var cache = {};
        function memoizer(fun){
            
            return function (n){
                if (cache[n] != undefined ) {
                jQuery("#_sub_region").html(cache[n]);  // This was changed
                return cache[n]
                } else {
                console.log(n);
                  cache[n] = result
                  return result
                }
            }
        }
    
    
    function GetSubRegion(selectedMainRegion){
    if (typeof(subRegWorker) != "undefined") {
    subRegWorker.terminate();
    }
    subRegWorker = new Worker("subRegWorker.js");
    subRegWorker.onmessage = function(e) {
    var workData = e.data;
    cache[n] = workData // This was changed
    jQuery("#_sub_region").html(workData);
    subRegWorker.terminate();
    return workData;
    }
    subRegWorker.postMessage(selectedMainRegion);
    }
    

    【讨论】:

      猜你喜欢
      • 2011-01-18
      • 2013-10-27
      • 2018-12-09
      • 2015-03-20
      • 2011-10-14
      • 2012-12-16
      • 1970-01-01
      • 2014-01-14
      • 1970-01-01
      相关资源
      最近更新 更多