【问题标题】:dc.js series chart - chart is too slow when filling missing datadc.js 系列图表 - 填充缺失数据时图表太慢
【发布时间】:2020-03-09 19:38:58
【问题描述】:

我想创建一个多时间线图作为系列图。

我阅读了有关填充缺失数据的 Stack Overflow dc.js lineChart - fill missing dates and show zero where no data

问题:我在那里实现了代码,它非常适合单线图。对于系列图表,我需要对其进行一些调整。它可以工作,但是性能很糟糕。

这是我们使用的示例数据:

let data = [{description: "Walmart", location: "40.216403 -74.541296", timeReported: 1581710670184}
 {description: "Target", location: "38.271996 -84.032575", timeReported: 1583524065011}
 {description: "Wendys", location: "39.255831 -75.532763", timeReported: 1583524065011}
 {description: "7-11", location: "34.925349 -78.463977", timeReported: 1583524065011}
 {description: "WaWa", location: "35.716208 -77.741230", timeReported: 1583524065013}
 {description: "7-11", location: "41.258950 -83.888060", timeReported: 1583524065013}
 {description: "Shell", location: "37.879694 -79.836127", timeReported: 1583524065011}
 {description: "Dominos", location: "35.890273 -80.700329", timeReported: 1583524065395}
 {description: "Dominos", location: "39.268777 -78.743366", timeReported: 1583524065397}
 {description: "Walgreens", location: "35.490215 -81.773863", timeReported: 1583524065399}
 {description: "7-11", location: "37.974797 -81.393449", timeReported: 1583524065506}
 {description: "Wendys", location: "40.859685 -76.963065", timeReported: 1583524065521}
 {description: "CVS", location: "38.517910 -78.251419", timeReported: 1583524065553}
 {description: "CVS", location: "35.947033 -81.616061", timeReported: 1583524142169}
 {description: "Shell", location: "39.566535 -77.992499", timeReported: 1583524142176}
 {description: "Target", location: "37.832142 -88.003151", timeReported: 1583524142170}
 {description: "Wendys", location: "40.245397 -80.061998", timeReported: 1583524142223}
 {description: "Macys", location: "39.631265 -75.157194", timeReported: 1583524142223}
 {description: "Macys", location: "36.631458 -77.803286", timeReported: 1583524142213}
 {description: "7-11", location: "36.249754 -79.830006", timeReported: 1583524142251}
 {description: "7-11", location: "41.138285 -83.298142", timeReported: 1583524142249}
 {description: "Wendys", location: "34.940485 -77.230388", timeReported: 1583524142249}
 {description: "7-11", location: "39.605373 -77.448768", timeReported: 1583524142296}
 {description: "Wendys", location: "35.609094 -79.455712", timeReported: 1583524142293}
 {description: "WaWa", location: "37.130753 -78.076709", timeReported: 1583524142310}
 {description: "Macys", location: "40.058482 -78.497258", timeReported: 1583524142338}
 {description: "Wendys", location: "39.255831 -75.532763", timeReported: 1582058735883}
 {description: "Macys", location: "39.631265 -75.157194", timeReported: 1582058735883}
 {description: "7-11", location: "36.249754 -79.830006", timeReported: 1582058735883}
 {description: "7-11", location: "39.605373 -77.448768", timeReported: 1582058735883}
 {description: "Wendys", location: "35.609094 -79.455712", timeReported: 1582058735883}
 {description: "WaWa", location: "37.130753 -78.076709", timeReported: 1582058735883}
 {description: "Macys", location: "40.058482 -78.497258", timeReported: 1582058735883}
 {description: "Kohls", location: "40.373533 -101.057470", timeReported: 1582838559493}] 

这里是示例代码。顺便说一句,下面代码中的 curTimeInterval 只是 d3 timeIntervlas 的别名,可以由用户选择。 (d3.timeHour, d3.timeDay, d3.timeWeek, d3.timeMonth)。

cf = crossfilter(data);

dateDim = cf.dimension((d) => {
  return curTimeInterval(d.timeReportedDate);
});
reportedGroup = dateDim.group().reduceSum((d) => 1);


let minDate = d3.min(reportedGroup.all(), (kv) => {
  return kv.key;
});
let maxDate = d3.max(reportedGroup.all(), (kv) => {
  return kv.key;
});
minDate = curTimeInterval.offset(minDate, -2);
maxDate = curTimeInterval.offset(maxDate, 2);

const runDimension = cf.dimension((d) => {
  return [d.description, curTimeInterval(d.timeReportedDate)];
});


const runGroup = runDimension.group();

// Fills the missing data in the group
const filledSeries = fill_composite_intervals(runGroup, curTimeInterval);

const seriesChart = new dc.SeriesChart('#series');
seriesChart
  .width(768)
  .height(480)
  .chart(function(c) {
    return new dc.LineChart(c).curve(d3.curveCardinal);
  })
  .x(d3.scaleTime().domain([minDate, maxDate]))
  .xUnits(curTimeInterval.range)
  .brushOn(false)
  .clipPadding(10)
  .elasticY(true)
  .dimension(runDimension)
  .group(filledSeries)
  .mouseZoomable(true)
  .seriesAccessor((d) => {
    return d.key[0];
  })
  .keyAccessor((d) => {
    return d.key[1];
  })
  .valueAccessor((d) => {
    return d.value;
  })
  .legend(dc.legend().x(350).y(350).itemHeight(13).gap(5).horizontal(1).legendWidth(140).itemWidth(70))
  .yAxis()
  .tickValues(d3.range(min > 0 ? min - 1 : min, max + 1));

seriesChart.margins().left += 40;


fill_composite_intervals = (group, interval) => {
  return {
    all: function() {
      const retVal = [];
      const allArray = group.all();
      if (!allArray.length) {
        return retVal;
      }
      allArray.sort((a, b) => {
        if (a.key[1].getTime() < b.key[1].getTime()) {
          return -1;
        }
        if (a.key[1].getTime() > b.key[1].getTime()) {
          return 1;
        }
        // a must be equal to b
        return 0;
      });
      const target = interval.range(allArray[0].key[1], allArray[allArray.length-1].key[1]);
      const allMap = new Map();
      allArray.forEach((obj) => {
        let innerArray = allMap.get(obj.key[0]);
        if (!innerArray) {
          innerArray = [];
          allMap.set(obj.key[0], innerArray);
        }
        innerArray.push({key: obj.key[1], value: obj.value});
      });
      allMap.forEach((value, key, map) => {
        const orig = value.map((kv) => ({key: new Date(kv.key), value: kv.value}));

        const result = [];
        if (orig.length) {

          let oi;
          let ti;
          for (oi = 0, ti = 0; oi < orig.length && ti < target.length;) {
            if (orig[oi].key <= target[ti]) {
              result.push(orig[oi]);
             if (orig[oi++].key.valueOf() === target[ti].valueOf()) {
                ++ti;
              }
            } else {
              result.push({key: target[ti], value: 0});
              ++ti;
            }
          }
          if (oi<orig.length) {
            Array.prototype.push.apply(result, orig.slice(oi));
          }
          if (ti<target.length) {
            Array.prototype.push.apply(result, target.slice(ti).map((t) => ({key: t, value: 0})));
          }
        }
        map.set(key, result);
      });

      allMap.forEach((value, key, map) => {
        value.forEach((obj) => {
          const newObj = {
            key: [key, obj.key],
            value: obj.value
          };

          retVal.push(newObj);
        });
      });
            return retVal;
    }
  };
};

【问题讨论】:

  • 你没有说你使用这个数据的间隔。我写了an example of choosing the appropriate interval,它演示了使用太细的间隔的问题。对于此数据,timeDay 产生 240 个 bin,timeHour 产生 5542 个 bin,timeMinute 产生 332482 个 bin。没有理由显示比像素宽度更多的点;我还在这个演示中关闭了xyTipsOn,因为每个点都被隐藏了。希望尽快跟进一个彻底的答案。

标签: d3.js dc.js crossfilter


【解决方案1】:

我从创建a fiddle 开始,它说明了问题。这里有趣的是一个选择菜单,它显示了哪些时间间隔适合图表的数据和缩放级别(域)。

显示超过 width/2 点是不合适的(因为它们不会被渲染),并且显示少于两个点也不合适,所以“不合适”的选项是灰色的斜体:

它使用一个对象将区间名称映射到对应d3区间的毫秒数:

const intervals = {
  timeSecond: 1000,
  timeMinute: 60000,
  timeHour: 3600000,
  timeDay: 86400000,
  timeWeek: 604800000,
  timeMonth: 2628000000,
  timeYear: 31536000000
}

allowed_intervals 确定第一个和最后一个适当的间隔:

function allowed_intervals(chart, intervals, dateDomain) {
  const dt = dateDomain[1].getTime() - dateDomain[0].getTime(),
    first = Object.entries(intervals).find(
        ([iname, ms]) => dt / ms < chart.width() / 2);
  if(!first)
    throw new Error('date range too long')
  const last = Object.entries(intervals).reverse().find(
     ([iname, ms]) => d3[iname](dateDomain[0]).getTime() !== d3[iname](dateDomain[1]).getTime());
  return [first[0],last[0]];
}

所以这一切都很好。该示例打印了结果数据,我们可以看到,如果我们用d3.timeMinute 填充示例数据,它会从原始的 15 个数据点生成 332482 个数据点。这显然是太多的数据,尤其是对于一个简单的示例。

这是一个很好的算法,可以找到合适的 d3 时间间隔。然而,当我们启用缩放时它会失败,因为现在我们可以放大到一个小时,比如说,timeMinute 是合适的,但是如果你对所有数据使用那个间隔,它的点太多了,图表会减慢到停下来。

所以我开始思考如何让它更有效率。我们实际上不需要填充每个缺失的时间间隔。我们真正需要的是确保我们抓住下降沿,当数据从非零变为零时,以及上升沿,数据从零变为非-零。在这些情况下,我们只需要向输入数据添加零。

这是fill_composite_intervals 的新版本,它使用上升沿和下降沿,只添加显示这些沿所需的零:

// input: a group with keys [category, time] and numeric values; a d3 time interval
// output: the same, but with zeroes filled in per the interval
function fill_composite_intervals(group, interval) {
  return {
    all: function() {
      const retVal = [];
      const allArray = group.all().slice();
      if (!allArray.length) {
        return retVal;
      }
      // make sure input data is sorted
      allArray.sort((a, b) => a.key[1].getTime() - b.key[1].getTime());

      // find all time intervals within the data
      // pad at both ends to add leading and trailing zeros
      const target = interval.range(interval.offset(allArray[0].key[1], -1),
        interval.offset(allArray[allArray.length-1].key[1], 2));

      // separate the data for each category
      const allMap = new Map();
      allArray.forEach(({key: [cat, time], value}) => {
        let innerArray = allMap.get(cat);
        if (!innerArray) {
          innerArray = [];
          allMap.set(cat, innerArray);
        }
        innerArray.push({key: time, value});
      });

      // walk each category, adding leading and trailing zeros
      allMap.forEach((value, key, map) => {
        const orig = value.map(({key, value}) => ({key: new Date(key), value}));

        const result = [];
        if (orig.length) {
          let oi = 0, ti = 0, last_filled = false, skipped_fill = false;
          while(oi < orig.length && ti < target.length) {
            if (orig[oi].key <= target[ti]) {
              if(skipped_fill) {
                // in the last iteration, we skipped a zero
                // so add one now (rising edge)
                result.push({key: target[ti-1], value: 0});
                skipped_fill = false;
              }
              result.push(orig[oi]);
              if (orig[oi++].key.getTime() === target[ti].getTime()) {
                ++ti;
              }
              last_filled = false;
            } else {
              if(!last_filled) {
                // last iteration we pushed a value
                // so push a zero now (falling edge)
                result.push({key: target[ti], value: 0});
                last_filled = true;
              }
              else skipped_fill = true;
              ++ti;
            }
          }
          if (oi<orig.length) {
            Array.prototype.push.apply(result, orig.slice(oi));
          }
          if (ti<target.length) {
            // add one trailing zero at the end
            result.push({key: target[ti], value: 0});
          }
        }
        map.set(key, result);
      });

      allMap.forEach((value, key, map) => {
        value.forEach(({key: time, value}) => {
          retVal.push({
            key: [key, time],
            value
          });
        });
      });
      return retVal;
    }
  };
}

查看代码中的 cmets 以获得解释。它只生成与输入数据成比例的数据,例如timeMinute,而不是 300+K,输入 15 得到 67 分。

有趣的是,我发现d3.curveCardinal 在零较少时会产生奇怪的伪影。直觉上,我认为如果跳过点,这条线会获得太多的“动力”。所以我选择了d3.curveMonotoneX。反正我觉得比较合适。

  .curve(d3.curveMonotoneX)

我还在开头和结尾填充了interval.range,这样数据的开头和结尾都为零,这更吸引人。

当您选择 d3.timeSecond 时,此示例仍然很慢(它仍然迭代 300+K 点),但它似乎在直到 timeMinute 时表现良好,这似乎捕获了此数据的分辨率。

进一步可能的改进:

  1. 添加更多前导零和尾随零,以使曲线一致/对称
  2. 停止使用interval.range,这样就不会有那么多积分被计算和丢弃;相反,只使用interval.offset 和下一个/最后一个数据点检测上升沿和下降沿(棘手!)

Example fiddle

【讨论】:

    【解决方案2】:

    由于我之前的答案在使用小时间间隔时仍然太慢,所以我重写了循环的核心。

    与其遍历开始和结束之间的整个日期范围,只需查看数据并检测是否应该在最后一个数据点和这个数据点之间添加一个或两个零,它会更快、更简单。

    fill_composite_intervals 的心脏现在看起来像

      const [begin, end] = d3.extent(allArray, ({key}) => key[1]).map(interval);
    
      // walk each category, adding leading and trailing zeros
      allMap.forEach((value, key, map) => {
        const orig = value.map(({key, value}) => ({key: new Date(key), value}));
    
        const result = [];
        if (orig.length) {
          let last = interval.offset(begin, -2);
          for(let oi = 0; oi < orig.length; ++oi) {
            const count = interval.count(last, orig[oi].key);
            if(count === 0 || count === 1) ;
            else {
              result.push({key: interval.offset(last, 1), value: 0});
              if(count > 2)
                result.push({key: interval.offset(orig[oi].key, -1), value: 0});
            }
            result.push(orig[oi]);
            last = orig[oi].key;
          }
          result.push({key: interval.offset(orig[orig.length-1].key, 1), value: 0});
        }
        map.set(key, result);
      });
    

    Faster fiddle.

    更新:更平滑、对称的曲线

    第一条和最后一条曲线是畸形的,因为它们缺少样条曲线上的控制点以使边缘处的斜率为 0。

    我们可以在开头和结尾再加一个零。

    这里是多时间线图的快速流畅的假组。

    function fill_composite_intervals(group, interval) {
      return {
        all: function() {
          const retVal = [];
          const allArray = group.all().slice();
          if (!allArray.length) {
            return retVal;
          }
          // make sure input data is sorted
          allArray.sort((a, b) => a.key[1].getTime() - b.key[1].getTime());
    
          // separate the data for each category
          const allMap = new Map();
          allArray.forEach(({key: [cat, time], value}) => {
            let innerArray = allMap.get(cat);
            if (!innerArray) {
              innerArray = [];
              allMap.set(cat, innerArray);
            }
            innerArray.push({key: time, value});
          });
    
    
          // walk each category, adding leading and trailing zeros
          allMap.forEach((value, key, map) => {
            const orig = value.map(({key, value}) => ({key: new Date(key), value}));
    
            const result = [];
            if (orig.length) {
              let last = interval.offset(orig[0].key, -3);
              for(let oi = 0; oi < orig.length; ++oi) {
                const count = interval.count(last, orig[oi].key);
                if(count === 0 || count === 1) ;
                else {
                  result.push({key: interval.offset(last, 1), value: 0});
                  if(count > 2)
                    result.push({key: interval.offset(orig[oi].key, -1), value: 0});
                }
                result.push(orig[oi]);
                last = orig[oi].key;
              }
              result.push(
                {key: interval.offset(orig[orig.length-1].key, 1), value: 0},
                {key: interval.offset(orig[orig.length-1].key, 2), value: 0},
              );
            }
            map.set(key, result);
          });
    
          allMap.forEach((value, key, map) => {
            value.forEach(({key: time, value}) => {
              retVal.push({
                key: [key, time],
                value
              });
            });
          });
          return retVal;
        }
      };
    }
    

    Smoother fiddle.

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2017-05-12
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2022-11-30
      • 1970-01-01
      相关资源
      最近更新 更多