【发布时间】:2018-06-29 18:42:03
【问题描述】:
我想最终按季度汇总我的数据。我可以用下面的代码来做,但由于下面代码中的 INV_ITEM_ID 不是数字类型,它不允许我将它添加到列表中。
temptable[ ,lapply(.SD, sum), by = quarter(INVOICE_DT),
.SDcols = c("INV_ITEM_ID","Ext Sale", "Ext Total Cost", "CE100", "CE110","CE120","QTY_SOLD","PACKSLIP_WHSL")]
运行脚本时出现以下错误
Error in gsum(INV_ITEM_ID) :
Type 'character' not supported by GForce sum (gsum). Either add the prefix base::sum(.) or turn off GForce optimization using options(datatable.optimize=1)
下面我提供了我的数据子集(前 30 行)。
library(data.table)
temptable = setDT(structure(list(INVOICE_DT = c("2016-01-29", "2016-01-29", "2016-01-29",
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29",
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29",
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29",
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29",
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29",
"2016-01-29", "2016-01-29"), BRANCH_CODE = c("AK001", "AK001",
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001",
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001",
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001",
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001"
), INVOICE_NO = c("AK0015522", "AK0015529", "AK0015515", "AK0015529",
"AK0015514", "AK0015513", "AK0015519", "AK0015519", "AK0015520",
"AK0015520", "AK0015520", "AK0015522", "AK0015520", "AK0015520",
"AK0015520", "AK0015518", "AK0015525", "AK0015520", "AK0015519",
"AK0015520", "AK0015525", "AK0015518", "AK0015529", "AK0015519",
"AK0015525", "AK0015519", "AK0015519", "AK0015522", "AK0015522",
"AK0015529"), INV_SEQ_NO = c(6L, 6L, 3L, 3L, 3L, 3L, 148L, 149L,
91L, 45L, 162L, 34L, 125L, 39L, 159L, 56L, 64L, 127L, 160L, 93L,
45L, 34L, 5L, 138L, 46L, 38L, 161L, 35L, 28L, 4L), INV_ITEM_ID = c("11239190",
"11239190", "55692", "400050-131280", "0700535", "0800758", "69970",
"0445050", "0819092", "0115160", "0115160", "0703316", "55692",
"0115157", "0115157", "0115157", "440704-131280", "55618", "0703318",
"0205428", "0702119", "0115157", "465278", "465278", "0702121",
"15166", "0703315", "0703315", "0715718", "0243945"), ITEM_DESCR = c("Au-Ve-Co# 20898",
"Au-Ve-Co# 20898", "MM HSS T TAP 16X1.5", "3/16\"\"\"\"H05703-100R Hs",
"18/2 BLK SOOW", "3x1x1/4 180G FW", "BF SPEC SER HV1-6C", "1/4MP External Seat",
"1-1/2xNHConditonDisc", "HCS 7/16-14x1 3/4 YZ8", "HCS 7/16-14x1 3/4 YZ8",
"25AMP MINI AUTO FUSE", "MM HSS T TAP 16X1.5", "HCS 7/16-14x1.25 YZ8",
"HCS 7/16-14x1.25 YZ8", "HCS 7/16-14x1.25 YZ8", "BPOLY MELBW 1/4TX1/8",
"MM HSS P TAP 5 X 0.8", "MAXI-BLADE 20 AMP", "1-1/2\"\"\"\" T3 VF SURF",
"16ga Red Wire/PPF", "HCS 7/16-14x1.25 YZ8", "80I 45 ELBOW 1/4 BLK",
"80I 45 ELBOW 1/4 BLK", "16ga Blk Wire/PPF", "HCS 7/16-14x3.25 YZ8",
"20AMP MINI AUTO FUSE", "20AMP MINI AUTO FUSE", "3/0-3/8\"\"\"\"HvyWalCprLug",
"97053 Pink Marker"), STD_ITEM = c("Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), PRIVATE_LABEL = c("N",
"N", "Y", "N", "N", "N", "N", "N", "N", "N", "N", "N", "Y", "N",
"N", "N", "N", "Y", "N", "N", "N", "N", "Y", "Y", "N", "N", "N",
"N", "N", "N"), CATEGORY_PATH1 = c("08.5-Fleet & Automotive",
"08.5-Fleet & Automotive", "07-Cutting Tools & Metalworking",
"08-Hydraulics & Pneumatics", "09-Electrical", "05-Abrasives",
"08-Hydraulics & Pneumatics", "07.3-Plumbing", "05-Abrasives",
"01-Fasteners", "01-Fasteners", "08.5-Fleet & Automotive", "07-Cutting Tools & Metalworking",
"01-Fasteners", "01-Fasteners", "01-Fasteners", "08-Hydraulics & Pneumatics",
"07-Cutting Tools & Metalworking", "08.5-Fleet & Automotive",
"05-Abrasives", "08.5-Fleet & Automotive", "01-Fasteners", "07.3-Plumbing",
"07.3-Plumbing", "08.5-Fleet & Automotive", "01-Fasteners", "08.5-Fleet & Automotive",
"08.5-Fleet & Automotive", "08.5-Fleet & Automotive", "11-Chemicals & Paints"
), CATEGORY_PATH2 = c("03-Batteries & Accessories", "03-Batteries & Accessories",
"04-Threading and Tapping", "02-Hose and Hose Reels", "05-Electrical Wire and Accessories",
"03-Coated and Non-Woven Abrasives", "07-Valves & Accessories",
"04-Valves & Valve Accessories", "03-Coated and Non-Woven Abrasives",
"01-Bolts", "01-Bolts", "02-Fleet Electrical Products", "04-Threading and Tapping",
"01-Bolts", "01-Bolts", "01-Bolts", "01-Fittings", "04-Threading and Tapping",
"02-Fleet Electrical Products", "03-Coated and Non-Woven Abrasives",
"02-Fleet Electrical Products", "01-Bolts", "01-Pipe Fittings",
"01-Pipe Fittings", "02-Fleet Electrical Products", "01-Bolts",
"02-Fleet Electrical Products", "02-Fleet Electrical Products",
"02-Fleet Electrical Products", "11-Paints, Marking, and Accessories"
), CUST_ID = c("AK0010001", "AK0010001", "AK0010001", "AK0010001",
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001",
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001",
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001",
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001",
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001",
"AK0010001"), CUSTOMER_TIER = c("Tier 3", "Tier 3", "Tier 3",
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3",
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3",
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3",
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3"),
IS_VENDING = c("No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No"), SALE_PRICE = c(10.53, 10.53, 23.0175,
2.0251, 0.4755, 6.4725, 15.8235, 1.468, 1.0275, 0.3425, 0.3425,
0.3726, 23.0175, 0.2513, 0.2513, 0.2513, 2.7412, 5.7675,
2.226, 1.5579, 0.1418, 0.2513, 12.0835, 12.0835, 0.1297,
0.625, 0.3726, 0.3726, 3.514, 4.1112), TOTAL_COST = c(6.877449,
6.877449, 9.399562, 0.924873, 0.245128, 2.953287, 6.227787,
0.703494, 0.52035, 0.083454, 0.083454, 0.134285, 9.399562,
0.062686, 0.062686, 0.062686, 1.3493, 2.386274, 0.77503,
0.524996, 0.051595, 0.062686, 4.47782, 4.47782, 0.051628,
0.164998, 0.125893, 0.125893, 2.183983, 1.469989), POS_COST = c(6.877449,
6.877449, 9.399562, 0.924873, 0.245128, 2.953287, 6.227787,
0.703494, 0.52035, 0.083454, 0.083454, 0.134285, 9.399562,
0.062686, 0.062686, 0.062686, 1.3493, 2.386274, 0.77503,
0.524996, 0.051595, 0.062686, 4.47782, 4.47782, 0.051628,
0.164998, 0.125893, 0.125893, 2.183983, 1.469989), CE100 = c(6.55,
6.55, 9.25, 0.95, 0.23785, 2.81, 5.93127, 1.17, 0.508, 0.074946,
0.074946, 0.12, 9.25, 0.058626, 0.058626, 0.058626, 1.31,
2.35, 0.61, 0.5, 0.05172, 0.058626, 4.2844, 4.2844, 0.05172,
0.12255, 0.12, 0.12, 2.08, 1.43), CE110 = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), CE120 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), CE200 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CORP_PRICE = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), QTY_SOLD = c(15L,
10L, 6L, 50L, 250L, 10L, 10L, 20L, 50L, 25L, 25L, 50L, 6L,
25L, 25L, 25L, 10L, 6L, 10L, 30L, 100L, 25L, 15L, 10L, 100L,
20L, 50L, 50L, 4L, 36L), PACKSLIP_WHSL = c(NA, NA, 30.69,
2.63, 0.7315, 8.63, 20.55, 1.96, 1.37, 0.685, 0.685, 0.5323,
30.69, 0.5025, 0.5025, 0.5025, 3.56, 7.69, 3.18, 1.82, 0.2145,
0.5025, 18.59, 18.59, 0.1961, 1.25, 0.5323, 0.5323, 5.02,
5.71), PRICING_GROUP = c("PGB", "PGB", "PGB", "PGB", "PGB",
"PGB", "PGG", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB",
"PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB",
"PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB"), PGG_MIN_PRICE = c(257.9043375,
171.936225, 184.14, 131.5, 175.56, 86.3, 154.125, 39.2, 68.5,
15.07, 15.07, 26.615, 184.14, 11.055, 11.055, 11.055, 35.6,
46.14, 31.8, 54.6, 21.45, 11.055, 253.7535, 185.9, 19.61,
22, 26.615, 26.615, 20.08, 205.56), PGY_MIN_PRICE = c(206.32347,
137.54898, 171.2502, 128.87, 170.07375, 80.259, 147.96, 36.456,
63.705, 13.3575, 13.3575, 24.75195, 171.2502, 9.79875, 9.79875,
9.79875, 33.108, 45.2172, 29.574, 50.778, 19.9485, 9.79875,
245.388, 163.592, 18.2373, 19.5, 24.75195, 24.75195, 18.6744,
191.1708), PGR_MIN_PRICE = c(171.936225, 114.62415, 162.0432,
124.925, 166.41625, 75.944, 141.795, 35.672, 60.28, 9.9325,
9.9325, 24.21965, 162.0432, 7.28625, 7.28625, 7.28625, 32.396,
43.833, 28.938, 48.048, 19.5195, 7.28625, 239.811, 159.874,
17.8451, 14.5, 24.21965, 24.21965, 18.2728, 180.8928), `Ext Sale` = c(157.95,
105.3, 138.105, 101.255, 118.875, 64.725, 158.235, 29.36,
51.375, 8.5625, 8.5625, 18.63, 138.105, 6.2825, 6.2825, 6.2825,
27.412, 34.605, 22.26, 46.737, 14.18, 6.2825, 181.2525, 120.835,
12.97, 12.5, 18.63, 18.63, 14.056, 148.0032), `Ext Total Cost` = c(103.161735,
68.77449, 56.397372, 46.24365, 61.282, 29.53287, 62.27787,
14.06988, 26.0175, 2.08635, 2.08635, 6.71425, 56.397372,
1.56715, 1.56715, 1.56715, 13.493, 14.317644, 7.7503, 15.74988,
5.1595, 1.56715, 67.1673, 44.7782, 5.1628, 3.29996, 6.29465,
6.29465, 8.735932, 52.919604), cust_part = c("AK001000111239190",
"AK001000111239190", "AK001000155692", "AK0010001400050-131280",
"AK00100010700535", "AK00100010800758", "AK001000169970",
"AK00100010445050", "AK00100010819092", "AK00100010115160",
"AK00100010115160", "AK00100010703316", "AK001000155692",
"AK00100010115157", "AK00100010115157", "AK00100010115157",
"AK0010001440704-131280", "AK001000155618", "AK00100010703318",
"AK00100010205428", "AK00100010702119", "AK00100010115157",
"AK0010001465278", "AK0010001465278", "AK00100010702121",
"AK001000115166", "AK00100010703315", "AK00100010703315",
"AK00100010715718", "AK00100010243945")), .Names = c("INVOICE_DT",
"BRANCH_CODE", "INVOICE_NO", "INV_SEQ_NO", "INV_ITEM_ID", "ITEM_DESCR",
"STD_ITEM", "PRIVATE_LABEL", "CATEGORY_PATH1", "CATEGORY_PATH2",
"CUST_ID", "CUSTOMER_TIER", "IS_VENDING", "SALE_PRICE", "TOTAL_COST",
"POS_COST", "CE100", "CE110", "CE120", "CE200", "CORP_PRICE",
"QTY_SOLD", "PACKSLIP_WHSL", "PRICING_GROUP", "PGG_MIN_PRICE",
"PGY_MIN_PRICE", "PGR_MIN_PRICE", "Ext Sale", "Ext Total Cost",
"cust_part"), row.names = c(NA,
30L), class = c("data.table", "data.frame")))
我也尝试过通过 INV_ITEM_ID 运行脚本,但脚本不会只返回不同的值。
我无法通过 INV_ITEM_ID 进行总结。
【问题讨论】:
-
下一次,请写一个最小的示例——我们可能不需要查看您的真实脚本或数据,如果您制作一个新的可重现示例,我们可能会更好地了解问题。无论如何,错误说它不能求和 INV_ITEM_ID,这是有道理的。要通过多个变量进行总结,您可以像
temptable[ , lapply(.SD, sum), by = .(INV_ITEM_ID, quarter(INVOICE_DT)), .SDcols = c("Ext Sale", "Ext Total Cost", "CE100", "CE110","CE120","QTY_SOLD","PACKSLIP_WHSL")]那样做by=.(var1,var2)不确定是否能回答它。 -
Frank,这实际上解决了我的第一个问题。但我遇到的最大问题是,当它返回 INV_ITEM_ID 值时,它实际上并没有被汇总。它给了我 5,135,153 行和 933,049 个不同的值。我要做的是将其汇总到 933,049 行,然后按季度汇总
-
好的,如果你想在一行上每个不同的值和在列上的 Quarters + vars (?),你可以把最后一个命令的结果传递给
dcast(res, INV_ITEM_ID ~ quarter, value.var=c("Ext Sale", "Ext Total Cost", "CE100", "CE110","CE120","QTY_SOLD","PACKSLIP_WHSL"))虽然我猜这可能非常难以阅读。更短的版本:dcast(temptable, INV_ITEM_ID ~ quarter(INVOICE_DT), fun = sum, value.var = val_cols) -
弗兰克,我想我不确定我是否完全理解这一点。我想知道为什么我会通过 INV_ITEM_ID 总结它没有将销售信息组合到每个分组中。我不知道为什么它显示多行而不是将它们加在一起
-
好的,您可能需要澄清您期望的输出,对应于提供的 30 行示例。一些指导:stackoverflow.com/questions/5963269/…
标签: r data.table lapply lubridate