有时我发现索引已自动用于查询,即使操作员$not 加入了操作。它让我想起
这个问题也让我困惑了很久。我尝试了新的线索并发现了一些不同的东西。我想我终于找到了答案。欢迎大家在这里发表评论,如果发现其他不同之处。
在 mongo shell V2.6.4 上运行
初始化数据如下:
> db.a.drop();
false
> db.a.insert({_id:1, a:[1,2,3], b:[{x:1, y:2}, {x:4, y:4}], c:1});
WriteResult({ "nInserted" : 1 })
> db.a.insert({_id:2, a:[4,2,3], b:[{x:1, y:2}, {x:4, y:4}], c:1});
WriteResult({ "nInserted" : 1 })
> db.a.ensureIndex({a:1}, {name:"a"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
> db.a.ensureIndex({"b.x":1}, {name:"bx"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 2,
"numIndexesAfter" : 3,
"ok" : 1
}
> db.a.ensureIndex({c:1}, {name:"c"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 3,
"numIndexesAfter" : 4,
"ok" : 1
}
> db.a.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"a" : 1
},
"name" : "a",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"b.x" : 1
},
"name" : "bx",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"c" : 1
},
"name" : "c",
"ns" : "test.a"
}
]
> db.a.find();
{ "_id" : 1, "a" : [ 1, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 2, "y" : 3 } ], "c" : 1 }
{ "_id" : 2, "a" : [ 4, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 4, "y" : 4 } ], "c" : 1 }
此块只是简单地证明即使$not 加入查询操作,索引也会自动正确使用。
> db.a.find({c:{$not:{$gte:1}}}).explain();
{
"cursor" : "BtreeCursor c",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"c" : [
[
{
"$minElement" : 1
},
1
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
这是原问题提到的风格。索引已自动使用。
> db.a.find({b:{$elemMatch:{x:{$gte:1}}}}).explain();
{
"cursor" : "BtreeCursor bx", // attention on this line
"isMultiKey" : true,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 4,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 9,
"indexBounds" : {
"b.x" : [
[
1,
Infinity
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
在 $elemMatch 之前使用运算符 $not 时,索引不起作用。这是这个问题的核心。
> db.a.find({b:{$not:{$elemMatch:{x:{$gte:1}}}}}).explain();
{
"cursor" : "BasicCursor", // attention on this line
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 2,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"server" : "Duke-PC:27017",
"filterSet" : false
}
这个块:找到一些方法来解释数组字段索引的机制。
共有两个文件,但nscanned: 6。这告诉我们索引是如何根据数组类型构建的。也就是说,索引节点位于数组的每个元素上,而不是数组本身。我想象字段a 上的索引结构是这样的:
BTree: Node(value:1, entry:[entry({_id:1})]), Node(value:2, entry:[entry({_id:1}), entry({_id:2})]), ...
当然,这只是我的想象以作解释。 :)
> db.a.find({a:{$gte:1}}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 6, // attention on this line
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 6,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
1,
Infinity
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
使用运算符$not 时,自动采用相关索引。而“indexBounds”字段告诉我们$not如何处理查询。
> db.a.find({a:{$not:{$gte:2}}},{_id:0,a:1}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true,
"n" : 0,
"nscannedObjects" : 1, // attention on this field
"nscanned" : 2, // attention on this field
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : { // attention on this field
"a" : [
[
{
"$minElement" : 1
},
2
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
插入具有相同字段名称a 但不是数组的新文档。
> db.a.insert({a:1});
WriteResult({ "nInserted" : 1 })
> db.a.find();
{ "_id" : 1, "a" : [ 1, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 2, "y" : 3 } ], "c" : 1 }
{ "_id" : 2, "a" : [ 4, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 4, "y" : 4 } ], "c" : 1 }
{ "_id" : ObjectId("541e4fcbb65042180c128280"), "a" : 1 }
请阅读此块与上面的内容进行比较。
> db.a.find({a:{$not:{$gte:2}}},{_id:0,a:1}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true, // This tells engine there are repeated array elements on index.
"n" : 1,
"nscannedObjects" : 2, // The third document should only access the index to fetch data
// since it has enough information.
// But here engine still read from the collection. My unstanding is the engine
// can not distinguish whether this index field is an array element or not,
// so it has to access the collection to find more information.
"nscanned" : 3,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 3,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 25,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
2
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
结论:
-
elemMatch很特别:
-
$elemMatch 明确告诉字段“b”是一个数组。
- 并且根据此运算符的查询定义,找到与查询匹配的任何元素然后
true 可以立即返回。但只要完成对数组所有元素的扫描,没有找到任何满意的元素,则可以返回false。
-
But index structure (think about my imagination above) on array can not support this kind of operation because engine can not determine which nodes on index are exactly from a certain array, if only by index. This is the most important point to explain this question.
- 其他算子在自己的查询定义中没有这个限制,比如$gte, $lt, ...,因为只有一个匹配才能判断是否匹配,可以直接通过索引来满足。
最后,有一种方法可以解决原始问题,但并不完美,因为必须提供整个元素。
数组字段上的索引,而不是元素。
> db.a.ensureIndex({b:1});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 4,
"numIndexesAfter" : 5,
"ok" : 1
}
> db.a.find({b:{$ne:{x:2, y:3}}}).explain();
{
"cursor" : "BtreeCursor b_1",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 2,
"nscanned" : 4,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 33,
"indexBounds" : {
"b" : [
[
{
"$minElement" : 1
},
{
"x" : 2,
"y" : 3
}
],
[
{
"x" : 2,
"y" : 3
},
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}