您需要创建一个偏移量/长度对数组。例如“foo aaab bar aaa”,你会得到这个数组:
[
{offset: 4, length: 3, keyword: "aaa"},
{offset: 5, length: 3, keyword: "aab"},
{offset: 12, length: 3, keyword: "aaa"}
]
然后你需要遍历这个数组并展平重叠部分,结果是:
[
{offset: 4, length: 4, keywords: ["aaa", "aab"]},
{offset: 12, length: 3, keywords: ["aaa"]}
]
有了这个,你就有足够的信息在不同的偏移量和长度上正确放置跨度标签。
使偏移变平是最困难的部分,这是我的尝试,但我还没有完全测试它。玩the jsfiddle
function flattenOffsets(xs){
var out = [];
var alreadyProcessed = [];
xs.forEach(function(x, i){
if (alreadyProcessed.indexOf(x) !== -1) {
return;
}
var xStart = x.offset, xEnd = x.offset+x.length;
var fixed = {offset: x.offset, length: x.length, keywords: [x.keyword]};
var matches = xs.slice(i+1).filter(function(y){
var yStart = y.offset, yEnd = y.offset+y.length;
var overlapBefore = xStart <= yStart && xEnd <= yEnd && xEnd >= yStart;
var overlapAfter = yStart <= xStart && yEnd <= xEnd && yEnd >= xStart;
var contains = yStart >= xStart && yEnd <= xEnd;
var contained = xStart >= yStart && xEnd <= yEnd;
return overlapBefore || overlapAfter || contains || contained;
}).sort(function(a,b){
return b.offset - a.offset;
})
.forEach(function(y){
fixed.offset = Math.min(x.offset, y.offset);
fixed.length = Math.max((x.offset + x.length), (y.offset + y.length)) - fixed.offset;
fixed.keywords.push(y.keyword);
alreadyProcessed.push(y);
});
out.push(fixed);
alreadyProcessed.push(x);
});
return out;
}