【问题标题】:Aggregate averages from Embedded Documents嵌入文档的聚合平均值
【发布时间】:2015-07-05 17:52:17
【问题描述】:

我的文档结构如下所示:

{
    "_id" : ObjectId("52517d9e6e0af435ddd48219"),
    "date" : ISODate("2013-10-06T18:11:26.329Z"),
    "engines" : {
        "ahnlab" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "ahnlab",
            "node_name" : "",
            "status" : 1,
            "task_id" : "7fac4f67-2bde-49de-980a-4f7fa1d46db7",
            "threat" : "EICAR_Test_File"
        },
        "avast" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "avast",
            "node_name" : "",
            "status" : 4,
            "task_id" : "fdce50e9-9bf3-4cc9-91a4-b674a108d478",
            "threat" : ""
        },
        "avg" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "avg",
            "node_name" : "",
            "status" : 1,
            "task_id" : "4bdb4a37-80ab-4631-8587-edcbde7e2592",
            "threat" : "EICAR_Test"
        },
        "avira" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "avira",
            "node_name" : "",
            "status" : 4,
            "task_id" : "43b43bab-ff67-440c-9919-f6241ccaf539",
            "threat" : ""
        },
        "bitdefender" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "bitdefender",
            "node_name" : "",
            "status" : 4,
            "task_id" : "ba7efe74-ef93-465f-b6c3-6c4fc746934d",
            "threat" : ""
        },
        "comodo" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "comodo",
            "node_name" : "",
            "status" : 4,
            "task_id" : "831d2dc8-704b-4eb7-9a75-9a0364a8ab09",
            "threat" : ""
        },
        "drweb" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "drweb",
            "node_name" : "",
            "status" : 4,
            "task_id" : "ebb758c3-9146-4a99-b36f-0fb6ee024a33",
            "threat" : ""
        },
        "f-prot" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "f-prot",
            "node_name" : "",
            "status" : 4,
            "task_id" : "13590a25-ac0d-4b1b-b93e-bc715009432a",
            "threat" : ""
        },
        "forticlient" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "forticlient",
            "node_name" : "",
            "status" : 4,
            "task_id" : "",
            "threat" : ""
        },
        "kaspersky" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "kaspersky",
            "node_name" : "",
            "status" : 4,
            "task_id" : "",
            "threat" : ""
        },
        "malwarebytes" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "malwarebytes",
            "node_name" : "",
            "status" : 4,
            "task_id" : "a97f3d9a-d6a9-44df-8355-c053e9f4980a",
            "threat" : ""
        },
        "mcafee" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "mcafee",
            "node_name" : "",
            "status" : 1,
            "task_id" : "e7a592bb-84fc-4c47-a1ed-3719874b19cc",
            "threat" : "EICAR test file"
        },
        "msessentials" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "msessentials",
            "node_name" : "",
            "status" : 4,
            "task_id" : "01c9fb71-155a-473d-b45c-91fa117ae649",
            "threat" : ""
        },
        "nod32" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "nod32",
            "node_name" : "",
            "status" : 4,
            "task_id" : "a715cc8f-0e2f-4698-b883-a35fa6add13e",
            "threat" : ""
        },
        "norman" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "norman",
            "node_name" : "",
            "status" : 4,
            "task_id" : "29da2955-0674-45d2-ac4d-c0b3ea401cba",
            "threat" : ""
        },
        "norton" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "norton",
            "node_name" : "",
            "status" : 4,
            "task_id" : "c4cd1e3e-2f0d-4bf6-84cf-ab0962f7f4ed",
            "threat" : ""
        },
        "panda" : {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "panda",
            "node_name" : "",
            "status" : 4,
            "task_id" : "b0888f7c-e4a4-4b4e-b163-21d283e166f1",
            "threat" : ""
        }
    },
    "expiration_date" : ISODate("2013-10-06T18:11:36.329Z"),
    "file_name" : "ffgtr.exe",
    "scan_status" : "DONE",
    "task_id" : "4ce4ae9e-ef0a-476a-8189-92a5bfe328bd"
}

我想对每个引擎的“日期”和“定义”字段进行聚合投影。 (“定义”是一个嵌入字段)

只有当我指定特定的引擎时我才成功:

cursor = collection.aggregate([
    { "$match": { "date": { "$gte": startdate } } },
    {'$project': { "def": "$engines.avast.definitions","date":1,"_id": 0 }}
], allowDiskUse=True)

但我想在不指定特定引擎的情况下对所有引擎运行查询,例如:

cursor = collection.aggregate([
    { "$match": { "date": { "$gte": startdate } } },
    {'$project': { "def": "$engines.$elemMatch.definitions","date":1,"_id": 0 }}
], allowDiskUse=True)

(这不起作用)

我想要找到的是每个引擎类型的“日期”和每个“引擎”定义日期之间的平均差异。

【问题讨论】:

  • 它不会那样工作。您应该将引擎定义为“数组/列表”,其中每个“定义”值都有一个“一致”路径。聚合框架,就像其他 MongoDB 查询运算符一样,不能像这样遍历对象结构。当前结构的唯一方法是使用 mapReduce。但是,您实际上并没有说明您在这一步之后要做什么。

标签: mongodb mongodb-query pymongo aggregation-framework


【解决方案1】:

这里的主要问题是数据的结构。为了使用聚合框架并愉快地使用大多数其他 MongoDB 查询操作和索引实践,那么您的数据应该采用这样的结构。

{
    "_id" : ObjectId("52517d9e6e0af435ddd48219"),
    "date" : ISODate("2013-10-06T18:11:26.329Z"),
    "engines" : [
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "ahnlab",
            "node_name" : "",
            "status" : 1,
            "task_id" : "7fac4f67-2bde-49de-980a-4f7fa1d46db7",
            "threat" : "EICAR_Test_File"
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "avast",
            "node_name" : "",
            "status" : 4,
            "task_id" : "fdce50e9-9bf3-4cc9-91a4-b674a108d478",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "avg",
            "node_name" : "",
            "status" : 1,
            "task_id" : "4bdb4a37-80ab-4631-8587-edcbde7e2592",
            "threat" : "EICAR_Test"
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "avira",
            "node_name" : "",
            "status" : 4,
            "task_id" : "43b43bab-ff67-440c-9919-f6241ccaf539",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "bitdefender",
            "node_name" : "",
            "status" : 4,
            "task_id" : "ba7efe74-ef93-465f-b6c3-6c4fc746934d",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "comodo",
            "node_name" : "",
            "status" : 4,
            "task_id" : "831d2dc8-704b-4eb7-9a75-9a0364a8ab09",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "drweb",
            "node_name" : "",
            "status" : 4,
            "task_id" : "ebb758c3-9146-4a99-b36f-0fb6ee024a33",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "f-prot",
            "node_name" : "",
            "status" : 4,
            "task_id" : "13590a25-ac0d-4b1b-b93e-bc715009432a",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "forticlient",
            "node_name" : "",
            "status" : 4,
            "task_id" : "",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "kaspersky",
            "node_name" : "",
            "status" : 4,
            "task_id" : "",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "malwarebytes",
            "node_name" : "",
            "status" : 4,
            "task_id" : "a97f3d9a-d6a9-44df-8355-c053e9f4980a",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "mcafee",
            "node_name" : "",
            "status" : 1,
            "task_id" : "e7a592bb-84fc-4c47-a1ed-3719874b19cc",
            "threat" : "EICAR test file"
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "msessentials",
            "node_name" : "",
            "status" : 4,
            "task_id" : "01c9fb71-155a-473d-b45c-91fa117ae649",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "nod32",
            "node_name" : "",
            "status" : 4,
            "task_id" : "a715cc8f-0e2f-4698-b883-a35fa6add13e",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "norman",
            "node_name" : "",
            "status" : 4,
            "task_id" : "29da2955-0674-45d2-ac4d-c0b3ea401cba",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "norton",
            "node_name" : "",
            "status" : 4,
            "task_id" : "c4cd1e3e-2f0d-4bf6-84cf-ab0962f7f4ed",
            "threat" : ""
        },
        {
            "definitions" : ISODate("2000-01-01T00:00:00.000Z"),
            "name" : "panda",
            "node_name" : "",
            "status" : 4,
            "task_id" : "b0888f7c-e4a4-4b4e-b163-21d283e166f1",
            "threat" : ""
        }
    ],
    "expiration_date" : ISODate("2013-10-06T18:11:36.329Z"),
    "file_name" : "ffgtr.exe",
    "scan_status" : "DONE",
    "task_id" : "4ce4ae9e-ef0a-476a-8189-92a5bfe328bd"
}

然后使用该结构就像处理$unwind 然后为您的结果处理$group 一样简单:

db.collection.aggregate([
    { "$unwind": "$engines" },
    { "$group": {
        "_id": "$engines.name",
        "avgInterval": { "$avg": {
            "$subtract": [ "$date", "$engines.definitions" ]
        }}
    }}
])

两个日期对象之间的差异以毫秒为单位。

否则,您将无法使用带有 mapReduce 的 JavaScript 遍历代码中的对象键:

db.collection.mapReduce(
    function() {
        var doc = this;
        Object.keys(doc.engines).forEach(function(key) {
            emit( key, 
                ( doc.date.valueOf() - 
                    doc.engines[key].definitions.valueOf())
            );
        })
   },
   function(key,values) { 
       return ( Array.sum(values) / values.length );
   },
   { "out": { "inline": 1 } }
);

两者都会为您提供集合或其他查询输入中每个引擎“名称”的平均值。

尝试更改结构,因为它更适合您的持续需求并提供更快的处理速度。

【讨论】:

    猜你喜欢
    • 2013-09-18
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2019-05-20
    • 1970-01-01
    • 2019-04-29
    • 2021-08-12
    • 2017-07-31
    相关资源
    最近更新 更多