【问题标题】:MongoDB group by multiple fields in a way to not affect each other resultMongoDB 按多个字段分组,不会相互影响结果
【发布时间】:2018-07-17 07:57:57
【问题描述】:

我有以下查询,我想要的是拥有一组自定义组字段名称和字段值。

db.getCollection('mycollection').aggregate([
  {"$match":{
     "expireDate":{"$gte":"2018-02-06T00:00:00.000Z"},
     "publishDate":{"$lte":"2018-02-06T00:00:00.000Z"},
     "isPublished":true,"isDrafted":false,
     "deletedAt":{"$eq":null},"deleted":false
  }},
  {"$group":{
     "twentyFourHourAgo":{
        "$sum":{
           "$cond":[
             {"$gt":["$publishDate","2018-02-04T08:48:16.892Z"]},1,0
           ]
        }
      },
      "fortyEightHourAgo":{
        "$sum":{
            "$cond":[
               {"$gt":["$publishDate","2018-02-01T08:48:16.892Z"]},1,0
            ]
        }
      },
      "thirtyDaysAgo":{
         "$sum":{
            "$cond":[
               {"$gt":["$publishDate","2017-12-31T08:48:16.892Z"]},1,0
            ]
         }
      },
      "_id":{
        "position":{"$ifNull":["$position","Unknown"]},
        "workType":{"$ifNull":["$workType","Unknown"]},
        "functionalArea":{"$ifNull":["$functionalArea","Unknown"]},
        "minimumEducation":{"$ifNull":["$minimumEducation","Unknown"]},
        "gender":{"$ifNull":["$gender","Unknown"]},
        "contractType":{"$ifNull":["$contractType","Unknown"]},
        "locations":{"$ifNull":["$locations","Unknown"]},
        "requiredLanguages":{"$ifNull":["$requiredLanguages","Unknown"]},
        "company":{"$ifNull":["$company.name","Unknown"]}},"count":{"$sum":1}
     }
 },
 {"$group":{
     "_id":null,
     "twentyFourHourAgo":{
        "$sum":"twentyFourHourAgo"
     },
     "fortyEightHourAgo":{
        "$sum":"$fortyEightHourAgo"
     },
     "thirtyDaysAgo":{
        "$sum":"$thirtyDaysAgo"
     },
     "position":{"$addToSet":{"Name":"$_id.position","Count":"$count"}},
     "workType":{"$addToSet":{"Name":"$_id.workType","Count":"$count"}},
     "functionalArea":{
        "$addToSet":{"Name":"$_id.functionalArea","Count":"$count"}
     },
     "minimumEducation":{
        "$addToSet":{"Name":"$_id.minimumEducation","Count":"$count"}
     },
     "gender":{"$addToSet":{"Name":"$_id.gender","Count":"$count"}},"contractType":{"$addToSet":{"Name":"$_id.contractType","Count":"$count"}},"locations":{"$addToSet":{"Name":"$_id.locations","Count":"$count"}},"requiredLanguages":{"$addToSet":{"Name":"$_id.requiredLanguages","Count":"$count"}},"company":{"$addToSet":{"Name":"$_id.company","Count":"$count"}}}}]  
)

我在集合模式中的文档是这样的:

/* 1 */
{
    "_id" : ObjectId("59e4540bf14f1607b90ffb81"),
    "vacancyNumber" : "1",
    "position" : "Software Tester",
    "publishDate" : ISODate("2018-01-02T00:00:00.000Z"),
    "expireDate" : ISODate("2018-05-29T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "Education",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "companyFiles",
            "name" : "abbbff58cd3fda2c59ab2ee620ea5aa0",
            "mime" : ".png",
            "size" : 5806
        }
    },
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Germany", 
        "Itly", 
        "Iran"
    ],
    "canApplyOnline" : true,
    "skills" : [ 
        "Skill1", 
        "Skill2", 
        "Skill3", 
        "Skill4"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Key1", 
        "Key2"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "requiresTravel" : true,
    "gender" : "male",
    "nationalities" : [ 
        "afghan"
    ],
    "workType" : "Full Time",
    "contractType" : "Permanent",
}

/* 2 */
{
    "_id" : ObjectId("59f9402e05d04ebe5653d98f"),
    "vacancyNumber" : "1",
    "position" : "Software Engineer",
    "publishDate" : ISODate("2018-01-03T00:00:00.000Z"),
    "expireDate" : ISODate("2018-11-10T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "Education",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "logo container",
            "name" : "logo name",
            "mime" : "logo mime type",
            "size" : 1
        }
    },    
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Afghanistan", 
        "Itly", 
        "Iran"
    ],
    "skills" : [ 
        "Skill1", 
        "Another Skill"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Keyword", 
        "Key1"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "gender" : "male",
    "nationalities" : [ 
        "afghan", 
        "iranian"
    ],
    "workType" : "Full Time",
    "contractType" : "Short-Term",
}

/* 3 */
{
    "_id" : ObjectId("5a03235234f7504f13970abd"),
    "vacancyNumber" : "1",
    "position" : "Software Tester",
    "publishDate" : ISODate("2017-10-10T00:00:00.000Z"),
    "expireDate" : ISODate("2018-11-25T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "IT Software",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "My First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "logo container",
            "name" : "logo name",
            "mime" : "logo mime type",
            "size" : 1
        }
    },
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Germany", 
        "Itly", 
        "Iran"
    ],
    "skills" : [ 
        "Skill1", 
        "Test Skill"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Test Key", 
        "Keyword"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "gender" : "female",
    "nationalities" : [ 
        "afghan"
    ],
    "workType" : "Part Time",
    "contractType" : "Permanent",
}

现在我想通过自定义表达式检查“twentyFourHourAgo、fortyEightHourAgo、30DaysAgo”以及字段的值 (functionalArea, position, locations, keywords, workType) 来计算数据组。

我当前的查询结果是

{
    "_id" : null,
    "twentyFourHourAgo" : 0,
    "fortyEightHourAgo" : 0.0,
    "thirtyDaysAgo" : 2.0,
    "position" : [ 
        {
            "Name" : "Software Engineer",
            "Count" : 1.0
        }, 
        {
            "Name" : "Software Tester",
            "Count" : 1.0
        }
    ],
    "workType" : [ 
        {
            "Name" : "Full Time",
            "Count" : 1.0
        }, 
        {
            "Name" : "Part Time",
            "Count" : 1.0
        }
    ],
    "functionalArea" : [ 
        {
            "Name" : "Education",
            "Count" : 1.0
        }, 
        {
            "Name" : "IT Software",
            "Count" : 1.0
        }
    ],
    "minimumEducation" : [ 
        {
            "Name" : "Doctorate",
            "Count" : 1.0
        }
    ],
    "gender" : [ 
        {
            "Name" : "male",
            "Count" : 1.0
        }, 
        {
            "Name" : "female",
            "Count" : 1.0
        }
    ],
    "contractType" : [ 
        {
            "Name" : "Short-Term",
            "Count" : 1.0
        }, 
        {
            "Name" : "Permanent",
            "Count" : 1.0
        }
    ],
    "locations" : [ 
        {
            "Name" : [ 
                "Afghanistan", 
                "Itly", 
                "Iran"
            ],
            "Count" : 1.0
        }, 
        {
            "Name" : [ 
                "Germany", 
                "Itly", 
                "Iran"
            ],
            "Count" : 1.0
        }
    ],
    "requiredLanguages" : [ 
        {
            "Name" : [ 
                "Arabic", 
                "English", 
                "Russian", 
                "Dari", 
                "French"
            ],
            "Count" : 1.0
        }
    ],
    "company" : [ 
        {
            "Name" : "First Company",
            "Count" : 1.0
        }, 
        {
            "Name" : "My First Company",
            "Count" : 1.0
        }
    ]
}

如您所见,我有三个具有以下属性的文档:

  1. 两个文档具有相同的位置Software Tester,但查询返回1 Software Tester(这意味着如果我有多个文档在特定列中有一些共同的值,它们的计数结果是错误的)。其他字段“contractType, workType, etc...”也存在同样的问题。
  2. locations等数组类型字段中,我的第一个文档在locations数组中有Germany, Italy, Iran值,我的第二个文档有Afghanistan, Italy, Iran,我的第三个文档有Germany, Italy, Iran。但是查询结果是这样的:

    "locations" : [ 
      {
        "Name" : [ 
          "Afghanistan", 
          "Itly", 
          "Iran"
        ],
        "Count" : 1.0
      }, 
      {
        "Name" : [ 
          "Germany", 
          "Itly", 
          "Iran"
        ],
        "Count" : 1.0
      }
     ],
    

这应该是:Germany => 2, Italy,Iran => 3, and Afghanistan => 1 其他数组类型字段也存在同样的问题。

【问题讨论】:

  • 您需要在$group 之前的每个数组元素上使用$unwind。这将使您能够计算每个数组中的所有元素
  • @N3i1 我已经尝试过了,但是对于 $unwind,其他字段会影响并返回其他字段的错误结果,我已经尝试过 locations 字段。
  • 我在您上面的 OP 中看不到正在使用的 $unwind。请看我下面的答案

标签: mongodb mongoose aggregation-framework


【解决方案1】:

抱歉,我之前误解了您的问题。为了能够$unwindlocation 数组,但不影响您的twentyFourHourAgo 等,您可以查看使用$first

如果您希望对单个元素进行计数/求和,则需要 $unwind 任何数组。

$first的使用示例。

db.getCollection('foo').aggregate([
{ $unwind : "$locations" },
{ "$group" : { "_id" : "$_id", 
        "twentyFourHourAgo":{ $first : {
            "$sum" : { "$cond":[
                {"$gt":["$publishDate", ISODate("2016-10-10T00:00:00.000Z")]},1,0 ] } } },
        "fortyEightHourAgo" : { $first : {
            "$sum" : { "$cond" : [ 
                { "$gt" : [ "$publishDate","2018-01-02T00:00:00.000Z"]},1,0 ] } } },
        "thirtyDaysAgo" : { $first : { 
            "$sum" : { "$cond" : [
                { "$gt" : [ "$publishDate","2017-12-31T08:48:16.892Z"]},1,0 ] } } },
        } },
{ "$group" : { "_id" : null,
     "twentyFourHourAgo" : { "$sum" : "$twentyFourHourAgo" },
     "fortyEightHourAgo" : { "$sum" : "$fortyEightHourAgo" },
     "thirtyDaysAgo" : { "$sum" : "$thirtyDaysAgo" },
 }}
])

输出:

"_id" : null,
"twentyFourHourAgo" : 0,
"fortyEightHourAgo" : 3.0,
"thirtyDaysAgo" : 3.0,

请参阅此处$first,了解我认为它可能有用的更多信息。我在开头贴了$unwind,以帮助证明它可以解决您的 OP 中的问题。

【讨论】:

  • 如您所见,此更改导致其他字段,我有三个文档,但结果类似于"twentyFourHourAgo" : 0, "fortyEightHourAgo" : 9.0, "thirtyDaysAgo" : 9.0,
  • 对不起,我错过了。我试图强调$unwind 的必要性,以便解决您的count 问题!您是否尝试过$projection 来投影twentyFourHourAgofortyEightHourAgothirtyDaysAgo 的值,以免干扰您的分组?
  • 是的,我试过了,但我无法在群组中使用它。
猜你喜欢
  • 2023-03-18
  • 1970-01-01
  • 2014-05-20
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2018-09-19
相关资源
最近更新 更多