【发布时间】:2019-04-30 20:04:18
【问题描述】:
下面是我的数据框 (my_df)。我正在尝试将其作为时间序列对象来预测 2020 年,但我正在努力转换这种数据格式。
我正在尝试使用下面的代码将其转换为时间序列,但我在位置得到 NA 并且数据不是时间序列格式
我的尝试
ts(my_df[,c(-2,-3)], start=c(2009), end=c(2014), frequency=1)
下面的数据框 my_df
structure(list(`Geogrphical Location` = c("United States", "Northeast",
"Midwest", "South", "West", ".Alabama", ".Alaska", ".Arizona",
".Arkansas", ".California", ".Colorado", ".Connecticut", ".Delaware",
".District of Columbia", ".Florida", ".Georgia", ".Hawaii", ".Idaho",
".Illinois", ".Indiana", ".Iowa", ".Kansas", ".Kentucky", ".Louisiana",
".Maine", ".Maryland", ".Massachusetts", ".Michigan", ".Minnesota",
".Mississippi", ".Missouri", ".Montana", ".Nebraska", ".Nevada",
".New Hampshire", ".New Jersey", ".New Mexico", ".New York",
".North Carolina", ".North Dakota", ".Ohio", ".Oklahoma", ".Oregon",
".Pennsylvania", ".Rhode Island", ".South Carolina", ".South Dakota",
".Tennessee", ".Texas", ".Utah", ".Vermont", ".Virginia", ".Washington",
".West Virginia", ".Wisconsin", ".Wyoming", "Puerto Rico"), Census = c(308745538,
55317240, 66927001, 114555744, 71945553, 4779736, 710231, 6392017,
2915918, 37253956, 5029196, 3574097, 897934, 601723, 18801310,
9687653, 1360301, 1567582, 12830632, 6483802, 3046355, 2853118,
4339367, 4533372, 1328361, 5773552, 6547629, 9883640, 5303925,
2967297, 5988927, 989415, 1826341, 2700551, 1316470, 8791894,
2059179, 19378102, 9535483, 672591, 11536504, 3751351, 3831074,
12702379, 1052567, 4625364, 814180, 6346105, 25145561, 2763885,
625741, 8001024, 6724540, 1852994, 5686986, 563626, 3725789),
`Estimates Base` = c(308758105, 55318353, 66929825, 114563005,
71946922, 4780131, 710249, 6392301, 2916025, 37254522, 5029324,
3574114, 897936, 601766, 18804592, 9688680, 1360301, 1567650,
12831574, 6484136, 3046869, 2853129, 4339344, 4533479, 1328364,
5773786, 6547813, 9884129, 5303924, 2968103, 5988928, 989414,
1826334, 2700691, 1316461, 8791953, 2059198, 19378110, 9535688,
672591, 11536727, 3751615, 3831072, 12702857, 1052940, 4625410,
814195, 6346298, 25146100, 2763888, 625741, 8001041, 6724545,
1853011, 5687289, 563767, 3726157), `2010` = c(309348193,
55388056, 66978602, 114863114, 72118421, 4785492, 714031,
6408312, 2921995, 37332685, 5048644, 3579899, 899816, 605183,
18849098, 9713521, 1363945, 1571010, 12841578, 6490528, 3050738,
2858850, 4348662, 4544996, 1327730, 5788584, 6565524, 9877495,
5311147, 2970322, 5996118, 990641, 1830051, 2703284, 1316872,
8803729, 2064756, 19402640, 9558915, 674526, 11540983, 3759603,
3838048, 12712343, 1053337, 4635943, 816325, 6356671, 25244310,
2775326, 625982, 8025773, 6743226, 1854230, 5690263, 564513,
3721525), `2011` = c(311663358, 55632766, 67153331, 116061801,
72815460, 4799918, 722713, 6467163, 2939493, 37676861, 5118360,
3589893, 907924, 620477, 19096952, 9811610, 1377864, 1584143,
12860012, 6516480, 3065223, 2869503, 4369354, 4575404, 1328231,
5843603, 6611923, 9876213, 5348562, 2978162, 6010717, 997821,
1842283, 2718379, 1318473, 8841243, 2077756, 19519529, 9650963,
685476, 11544824, 3786274, 3868031, 12744293, 1052451, 4672637,
824398, 6397634, 25646389, 2816124, 626730, 8110035, 6822520,
1854972, 5709640, 567725, 3678732), `2012` = c(313998379,
55829059, 67332320, 117299171, 73537829, 4815960, 731089,
6549634, 2950685, 38011074, 5189867, 3593795, 916993, 635327,
19344156, 9914668, 1391820, 1595911, 12870798, 6537743, 3076310,
2885262, 4384799, 4603429, 1328895, 5889651, 6658008, 9887238,
5380285, 2984945, 6025415, 1005196, 1855725, 2752565, 1321182,
8873211, 2083784, 19602769, 9746175, 702087, 11550839, 3817054,
3899116, 12771854, 1052901, 4720760, 834441, 6454306, 26071655,
2855782, 626444, 8192048, 6895226, 1856560, 5726177, 576765,
3634488), `2013` = c(316204908, 55988771, 67543948, 118424320,
74247869, 4829479, 736879, 6624617, 2958663, 38335203, 5267603,
3596003, 925395, 649165, 19582022, 9984938, 1406481, 1612011,
12879505, 6569102, 3091930, 2892821, 4400477, 4626402, 1329076,
5931129, 6706786, 9898982, 5418521, 2990482, 6042711, 1014314,
1868559, 2786464, 1322687, 8899162, 2085193, 19673546, 9841590,
724019, 11570022, 3852415, 3925751, 12781338, 1053033, 4767894,
844922, 6494821, 26473525, 2902663, 627140, 8262692, 6968006,
1853231, 5742854, 582684, 3593077), `2014` = c(318563456,
56116791, 67726368, 119696311, 75023986, 4843214, 736705,
6719993, 2966912, 38680810, 5349648, 3591873, 934948, 659005,
19888741, 10087231, 1416349, 1633532, 12867544, 6595233,
3108030, 2899360, 4413057, 4647880, 1330719, 5967295, 6749911,
9915767, 5453109, 2992400, 6060930, 1022867, 1881145, 2833013,
1328743, 8925001, 2083024, 19718515, 9934399, 739904, 11594408,
3877499, 3968371, 12790565, 1054480, 4828430, 852561, 6544663,
26944751, 2941836, 626984, 8317372, 7054196, 1848514, 5758377,
583642, 3534874), `2015` = c(320896618, 56184737, 67838387,
121039206, 75834288, 4853875, 737709, 6817565, 2977853, 38993940,
5448819, 3584730, 944076, 670377, 20244914, 10199398, 1425157,
1652828, 12839047, 6612768, 3121997, 2906721, 4424611, 4668960,
1329453, 5994983, 6784240, 9917715, 5482435, 2989390, 6076204,
1032073, 1893765, 2883758, 1330111, 8935421, 2080328, 19747183,
10035186, 756835, 11605090, 3907414, 4024634, 12791904, 1055607,
4894834, 857919, 6595056, 27429639, 2990632, 626088, 8367587,
7160290, 1841053, 5767891, 586555, 3473181), `2016` = c(323127513,
56209510, 67941429, 122319574, 76657000, 4863300, 741894,
6931071, 2988248, 39250017, 5540545, 3576452, 952065, 681170,
20612439, 10310371, 1428557, 1683140, 12801539, 6633053,
3134693, 2907289, 4436974, 4681666, 1331479, 6016447, 6811779,
9928300, 5519952, 2988726, 6093000, 1042520, 1907116, 2940058,
1334795, 8944469, 2081015, 19745289, 10146788, 757952, 11614373,
3923561, 4093465, 12784227, 1056426, 4961119, 865454, 6651194,
27862596, 3051217, 624594, 8411808, 7288000, 1831102, 5778708,
585501, 3411307)), row.names = c(NA, -57L), class = c("tbl_df",
"tbl", "data.frame"))
请帮助我构建一个时间序列对象,以便我可以使用线性回归来预测第 1 列中提到的任何状态的 2020 年
【问题讨论】:
标签: r time-series linear-regression