【发布时间】:2022-07-01 00:13:20
【问题描述】:
我有这个包含选举人候选人的数据集,我想创建一个新变量来指示前一个选区的获胜者是否在同一个选区再次跑步。换句话说,我想要一个在任变量,如果上一次选举的获胜者在下一次选举中再次竞选,则其值为 1,如果下一次选举中的候选人是新候选人或未赢得上一次选举,则值为 0选举。因此,在我的数据集样本 DICKSON 中,AJAX-PICKERING 中的 JOE 赢得了 3900 的选举 ID 并重新参加了 4000 的选举,因此应该被标记为现任。此外,在 OSHAWA 中,从来没有现任者,因此所有候选人的值都是 0。
这是我的数据集示例
structure(list(full_name = c("WILLERT, CECILE", "CARVALHO, ANDREW",
"ASHE, KEVIN", "DICKSON, JOE", "THAVARAJASOORIER, BALA", "DELIS, ANDREW",
"TOMAN, STEVEN", "MCCARTHY, TODD", "DICKSON, JOE", "WISEMAN, EVAN",
"NARRAWAY, ADAM", "MCCARTHY, TODD", "DICKSON, JOE", "STEWART, KYLE",
"KING, JERMAINE", "RHODES, BRENDA", "HALL, SARA", "RICHTER, MATT",
"MILLER, NORM", "WATERS, CINDY", "RICHTER, MATT", "MILLER, NORM",
"ZYGANIUK, ALEX", "RICHTER, MATT", "MILLER, NORM", "WATERS, DAN",
"MOBBLEY, CLYDE", "STIVRINS, ANDY", "KEMP, ALEXANDER", "STREUTKER, JEFFREY",
"OUELLETTE, JERRY", "RYAN, SID", "MENEZES, JACQUIE", "LEADBETTER, STACEY",
"BELANGER, MATTHEW", "SHIELDS, MIKE", "FUDGE, BEN", "SMIT, BECKY",
"FRENCH, JENNIFER"), gender = c("female", "male", "male", "male",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male", "female", "female", "male", "male", "female",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male", "male", "female", "female", "male", "male",
"male", "female", "female"), gender_manual = c("", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", ""), gender_probability = c(1, 1, 1, 0.99, 0.92, 1, 1,
1, 0.99, 0.97, 1, 1, 0.99, 0.99, 0.98, 0.99, 0.99, 1, 1, 1, 1,
1, 0.87, 1, 1, 0.98, 0.97, 0.95, 1, 1, 0.98, 0.92, 1, 0.98, 1,
1, 0.99, 1, 1), gender_count = c(427L, 5168L, 5362L, 3679L, 37L,
5168L, 2600L, 847L, 3679L, 657L, 3957L, 847L, 3679L, 1944L, 80L,
1816L, 4435L, 4915L, 60L, 2477L, 4915L, 60L, 5856L, 4915L, 60L,
3240L, 36L, 3139L, 1645L, 932L, 1031L, 71L, 78L, 1034L, 3338L,
5595L, 3363L, 1447L, 6717L), ballots = c(3067L, 368L, 13898L,
19857L, 3275L, 299L, 843L, 14718L, 19606L, 5952L, 1589L, 14999L,
26257L, 301L, 8274L, 9819L, 5015L, 4557L, 17348L, 6537L, 3251L,
19417L, 6527L, 7484L, 15761L, 10158L, 4999L, 296L, 2474L, 253L,
15977L, 13482L, 6921L, 1035L, 435L, 14316L, 147L, 1785L, 22232L
), election_date = c("2007-10-10", "2007-10-10", "2007-10-10",
"2007-10-10", "2007-10-10", "2011-10-06", "2011-10-06", "2011-10-06",
"2011-10-06", "2011-10-06", "2014-06-12", "2014-06-12", "2014-06-12",
"2014-06-12", "2014-06-12", "2007-10-10", "2007-10-10", "2007-10-10",
"2007-10-10", "2011-10-06", "2011-10-06", "2011-10-06", "2011-10-06",
"2014-06-12", "2014-06-12", "2014-06-12", "2014-06-12", "2014-06-12",
"2007-10-10", "2007-10-10", "2007-10-10", "2007-10-10", "2011-10-06",
"2011-10-06", "2011-10-06", "2011-10-06", "2011-10-06", "2014-06-12",
"2014-06-12"), election_id = c(3900L, 3900L, 3900L, 3900L, 3900L,
4000L, 4000L, 4000L, 4000L, 4000L, 4100L, 4100L, 4100L, 4100L,
4100L, 3900L, 3900L, 3900L, 3900L, 4000L, 4000L, 4000L, 4000L,
4100L, 4100L, 4100L, 4100L, 4100L, 3900L, 3900L, 3900L, 3900L,
4000L, 4000L, 4000L, 4000L, 4000L, 4100L, 4100L), party = c("GREEN",
"FAMILY COALITION PARTY OF ONTARIO", "PROGRESSIVE CONSERVATIVE",
"LIBERAL", "NEW DEMOCRATIC", "ONTARIO LIBERTARIAN PARTY", "GREEN",
"PROGRESSIVE CONSERVATIVE", "LIBERAL", "NEW DEMOCRATIC", "THE GREEN PARTY OF ONTARIO",
"PROGRESSIVE CONSERVATIVE PARTY OF ONTARIO", "ONTARIO LIBERAL PARTY",
"ONTARIO LIBERTARIAN PARTY", "NEW DEMOCRATIC PARTY OF ONTARIO",
"LIBERAL", "NEW DEMOCRATIC", "GREEN", "PROGRESSIVE CONSERVATIVE",
"LIBERAL", "GREEN", "PROGRESSIVE CONSERVATIVE", "NEW DEMOCRATIC",
"THE GREEN PARTY OF ONTARIO", "PROGRESSIVE CONSERVATIVE PARTY OF ONTARIO",
"ONTARIO LIBERAL PARTY", "NEW DEMOCRATIC PARTY OF ONTARIO", "FREEDOM",
"GREEN", "FAMILY COALITION PARTY OF ONTARIO", "PROGRESSIVE CONSERVATIVE",
"NEW DEMOCRATIC", "LIBERAL", "GREEN", "ONTARIO LIBERTARIAN PARTY",
"NEW DEMOCRATIC", "FREEDOM", "THE GREEN PARTY OF ONTARIO", "NEW DEMOCRATIC PARTY OF ONTARIO"
), party_code = c("", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", ""), party_manual = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", ""), riding = c("AJAX-PICKERING", "AJAX-PICKERING",
"AJAX-PICKERING", "AJAX-PICKERING", "AJAX-PICKERING", "AJAX-PICKERING",
"AJAX-PICKERING", "AJAX-PICKERING", "AJAX-PICKERING", "AJAX-PICKERING",
"AJAX-PICKERING", "AJAX-PICKERING", "AJAX-PICKERING", "AJAX-PICKERING",
"AJAX-PICKERING", "PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA",
"PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA",
"PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA",
"PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA",
"PARRY SOUND-MUSKOKA", "PARRY SOUND-MUSKOKA", "OSHAWA", "OSHAWA",
"OSHAWA", "OSHAWA", "OSHAWA", "OSHAWA", "OSHAWA", "OSHAWA", "OSHAWA",
"OSHAWA", "OSHAWA"), riding_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 69L, 69L, 69L, 69L, 69L, 69L,
69L, 69L, 69L, 69L, 69L, 69L, 69L, 61L, 61L, 61L, 61L, 61L, 61L,
61L, 61L, 61L, 61L, 61L), vote_share = c(7.57938959594711, 0.909427900654887,
34.345730878537, 49.0720375633263, 8.09341406153466, 0.721908349026993,
2.03534695060119, 35.5352745183254, 47.3369066589406, 14.3705635231059,
3.09023726176585, 29.1695838195255, 51.0637884091793, 0.5853753403345,
16.0910151691949, 26.7263670758594, 13.6503443207491, 12.403712675903,
47.2195759274885, 18.2945259151461, 9.09828724952424, 54.3406470390686,
18.2665397962611, 19.3395007493927, 40.7282030079074, 26.2494185746033,
12.9179802573776, 0.7648974107189, 7.68657180140434, 0.786056049213944,
49.6395948549059, 41.8877772944759, 30.2835389866107, 4.52874770280914,
1.90338671567341, 62.6411131530585, 0.643213441848254, 7.43223549985427,
92.5677645001457), won = c(FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE), winner = c(0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1)), row.names = c(NA,
-39L), groups = structure(list(election_id = c(3900L, 3900L,
3900L, 4000L, 4000L, 4000L, 4100L, 4100L, 4100L), riding_id = c(1L,
61L, 69L, 1L, 61L, 69L, 1L, 61L, 69L), .rows = structure(list(
1:5, 29:32, 16:19, 6:10, 33:37, 20:23, 11:15, 38:39, 24:28), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
谢谢!
【问题讨论】:
标签: r time-series lead